---
title: 'Replication code for "Financial Crises and the Selection and Survival of Women Finance Ministers" American Political Science Review'
author: "Brenna Armstrong, Tiffany D. Barnes, Daina Chiba, and Diana Z. O’Brien"
date: "2023/06/22"
output: 
  html_document:
    toc: true
    toc_float: true
    highlight: tango
    code_folding: hide
---

<style>

table, th, td {
    font-size: 15px;
        width: 100%;
}

</style>

# Introduction

To knit this RMarkdown file, please place the replication data files (`ABCO_data1.csv` and `ABCO_data2.csv`) in the same folder as this file. You would need to have the `pacman` package installed before running the code. All the other necessary R packages will be installed automatically if not installed yet. 

The file has four main sections:  
- **1. Descriptive Analysis** provides descriptions of the data such as the choropleth map in the main manuscript (Figure 1) and various statistics mentioned in the text.  
- **2. Women's Initial Access** replicates our analyses of women’s entrance into finance ministries that are presented in the main text or in the online appendix.   
- **3. Finance Minister Tenure** replicates our analyses of risk of a finance minister leaving office that are presented in the main text or in the online appendix.  
- **4. Minister Background** replicates our analyses of ministers' education and professional background that are discussed in the main text and in the online appendix.  

The results in the paper were generated using R ver 4.3.0. The versions of the R packages used in the analyses are provided in the **Session Information** section at the bottom of the RMarkdown output (`ABCO_Replication.html`).

## Preparation

Load relevant packages and define some colors.

```{r, message = FALSE}
## Unload all loaded packages if any
pacman::p_unload(pacman::p_loaded(), character.only = TRUE)

## Load necessary packages
pacman::p_load(tidyverse, tidylog, 
               DT, maps, countrycode, pltesim, 
               stargazer, multiwayvcov, lmtest, survival, 
               knitr, kableExtra, rcartocolor, ggdist, gghalves, patchwork)

## custom colors
my_pal <- carto_pal(n = 8, name = "Bold")[c(7, 3, 2, 1)]
```

Load the data.

```{r, message = FALSE}
## Data for main analyses
df <- read_csv("ABCO_data1.csv")
## Data for background information analyses
df_appC <- read_csv("ABCO_data2.csv")
```


Define helper functions

```{r}
## Functions to calculate clustered standard errors from the fitted object
### For hazard rate
get_clse_hr <- function(fit){
  sqrt(diag(fit $ var))
  }

### For hazard ratio (exponentiated coefficients)
get_clse_or <- function(fit){
  or <- exp(coef(fit))
  var.diag <- diag(fit $ var)
  return(sqrt(or^2 * var.diag))
}

## Functions to extract relevant results
extract_hr <- function(fit){
  df <- summary(fit) $ conf.int
  pe <- df[rownames(df) == "crisis_mon", "exp(coef)"]
  low <- df[rownames(df) == "crisis_mon", "lower .95"]
  up <- df[rownames(df) == "crisis_mon", "upper .95"]
  data.frame(pe = pe, low = low, up = up)
}

## Function to add AIC to the model object (so that stargazer recognizes them)
add_AIC <- function(fit){
  fit $ AIC <- AIC(fit)
  return(fit)
}


## Functions to test the PH assumption 

## The following functions are taken from Park and Hendry's (2015) replication
## package (park_hendry_ajps03.r). We modified them so that we focus on 
## KM transformation (default) and rank transformation. 

zphTable <- function(cox.fit, digits) {
  # Performs cox.zph function from the survival package using the four
  #   available time transformations and outputs one table with side-by-side
  #   presentation of results
  # Args:
  #   cox.fit: fit of a Cox model using the coxph function
  #   digits:  desired number of digits for presentation of results
  tab <- round(cbind(cox.zph(cox.fit, transform = 'km')$table,
                     cox.zph(cox.fit, transform = 'rank')$table), digits = digits)
  cols <- colnames(cox.zph(cox.fit)$table)
  colnames(tab) <- c(paste('km.', cols, sep = ''),
                     paste('rank.', cols, sep = ''))    
  tab
}

calcNPH <- function(fit, tfunc = "id", seed = 12345, nsims = 1000){
  if (tfunc == "id"){ft <- function(t) t}
  if (tfunc == "log"){ft <- function(t) log(t)}
  if (tfunc == "sqrt"){ft <- function(t) sqrt(t)}
  set.seed(seed)
  simb <- MASS::mvrnorm(n = nsims, mu = coef(fit), Sigma = vcov(fit))
  col_need <- which(colnames(simb) %in% c("crisis_mon", "tt(crisis_mon)"))
  simb <- simb[, col_need]
  time <- seq(from = 1, to = 120, by = 3)
  xmat <- cbind(1, ft(time))
  xb <- exp(xmat %*% t(simb))
  cut_points <- c(0.025, 0.05, 0.5, 0.95, 0.975)
  est <- apply(xb, 1, quantile, cut_points) |> t()
  est <- cbind(time = time, est) |> as.data.frame()
  colnames(est) <- c("time", "low95", "low90", "median", "upp90", "upp95")
  return(est)
}
```



# 1. Descriptive Analysis


## Choropleth Map (Figure 1)

Load world map data.

```{r}
world <- map_data("world") |> 
  mutate(
    iso3c = countrycode(
    sourcevar = region, 
    origin = "country.name", 
    destination = "iso3c")
    )
## Manually code ISO code for a few of them. 
world $ iso3c[ world $ region == "Kosovo"] <- "XKX"
world $ iso3c[ world $ region == "Micronesia"] <- "FSM"
```

Number of women per countries

```{r}
fm_sum <- df |> 
  filter(fm_name != "Unknown") |> 
  select(iso3c, fm_name, fm_gender) |> 
  unique() |> 
  group_by(iso3c) |> 
  summarise(n_ministers = n(), 
            n_women = sum(fm_gender)) |> 
  ungroup()
```

```{r}
table(fm_sum $ n_women)
```

Collapse the categories for 3+

```{r}
fm_sum |> filter(n_women >= 3)
```

Create a character vector for the number of women.

```{r}
fm_sum <- fm_sum |> 
  mutate(n_women_lab = as.character(n_women), 
         n_women_lab = if_else(n_women >= 3, "3+", n_women_lab))
with(fm_sum, table(n_women_lab, n_women, useNA = "ifany"))
```

Join this data set with the map data.

```{r}
fm_map <- full_join(world, fm_sum)
```

Check missing regions.

```{r}
fm_map |> filter(is.na(long)) |> pull(iso3c)
```

Hong Kong (HKG), Macao (MAC), and Tuvalu (TUV) are too small to see on the map anyway. Chechoslovakia (CSK) and Yugoslavia (YUG) are missing, as they are replaced by their successor countries. 


```{r}
## Convert NA to 0
fm_map <- fm_map |> 
  mutate(n_women_lab = replace_na(n_women_lab, "0"))

with(fm_map, table(n_women_lab, n_women, useNA = "ifany"))

```

```{r}
fm_map |> 
  filter(region != "Antarctica") |> 
  ggplot(aes(x = long, y = lat, group = group, fill = n_women_lab)) + 
  geom_polygon(colour = "black", size = 0.1) + 
  theme_minimal() + 
  theme(legend.position = c(0.6, 0.05),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", color="white")) + 
  xlab("") + ylab("") + labs(fill = "Number of woman\nfinance ministers") + 
  scale_x_discrete(labels=NULL) + scale_y_discrete(labels=NULL) + 
  scale_fill_manual(values=c("gray95", "gray65", "gray30", "black"))
ggsave("Figures/Figure1_Map.pdf", width = 10, height = 5)
```


## First Women Ministers

The code below shows the numbers we report in the main text (in the section titled "Testing the Effect of Financial Crises on Women’s Entrance into Power"). 

Number of countries that had seen a woman finance minister between 1972 and 2017. 

```{r}
with(fm_sum, table(n_women > 0))
```

77 (out of 202) countries had at least one woman finance ministers.

Identify countries with more than 1 women finance ministers.

```{r}
multi.ffm <- df |> 
  filter(fm_gender == 1) |> 
  select(country_name, iso3c, fm_name) |> 
  unique() |> 
  group_by(country_name, iso3c) |> 
  tally()

## Number of countries that have 1 woman finance ministers.
multi.ffm |> filter(n > 1)
```

Those that have 2, 3, or 5

```{r}
multi.ffm |> filter(n==2) |> print(n=Inf) # n = 2. 24
multi.ffm |> filter(n==3) |> print(n=Inf) # n = 3. 6
multi.ffm |> filter(n>3) |> print(n=Inf) # n > 3. 1
```


Listing up the women finance ministers:

```{r}
df |> 
  filter(fm_gender == 1) |> 
  group_by(country_name, fm_name) |> 
  summarise(first_date = min(date)) |> 
  arrange(first_date)
```
The first one is Dora Reluz from Panama in July 1972. 


87 out of 117 were appointed after 2000

```{r}
df |> 
  filter(fm_gender == 1) |> 
  group_by(country_name, fm_name) |> 
  summarise(first_date = min(date)) |> 
  filter(first_date >= "2000-01-01") |> 
  arrange(first_date)
```

## Tally by Regions

We define regions based on COW (GW) code. 

```{r}
region_cut  <- c(0, 200, 400, 600, 700)
region_name <- c("Americas", "Europe", "Africa", "Middle East", "Asia-Pacific")

## Define regions
df <- df |> 
  mutate(COWregion = region_name[findInterval(gwcode, region_cut)])

## Check missing
table(df $ COWregion, useNA = "always")
```

Some values were not matched unambiguously: ABW, BMU, COK, HKG, MAC, SRB, SVU, XKX, YUG

```{r}
df |> 
  filter(is.na(gwcode) == T) |> pull(country_name) |> unique()
```

Fill in NAs manually.

```{r}
df $ COWregion[df $ country_name == "Aruba"] <- "Europe"　# Part of Netherlands / Geographically in the Americas
df $ COWregion[df $ country_name == "Bermuda"] <- "Europe"　# UK's overseas territory / Geographically in Americas?
df $ COWregion[df $ country_name == "Cook Islands"] <- "Asia-Pacific"　# Part of NZ
df $ COWregion[df $ country_name == "China- Hong Kong"] <- "Asia-Pacific"
df $ COWregion[df $ country_name == "China- Macau"] <- "Asia-Pacific"
```

Tally by region

```{r}
df |> 
  filter(fm_gender==1) |> 
  select(COWregion, iso3c) |> 
  unique() |> 
  group_by(COWregion) |> tally()
```

We can see that women have been appointed to the finance portfolio in every region of the world. 


## Minister-Level Statistics

Calculate minister-level summaries that we mention in the text.

```{r}
min_lev <- df |> 
  filter(fm_name != "Unknown") |> 
  group_by(iso3c, fm_name) |> 
  summarise(fm_gender = mean(fm_gender))

table(min_lev $ fm_gender)  
```

We have 2859 + 117 = 2976 unique ministers, of which 117 (4%) are women. 

Spell-level summaries

```{r}
spell_lev <- df |> 
  filter(fm_name != "Unknown") |> 
  group_by(spellid) |> 
  summarise(time = max(tenure_t), 
            fail = max(tenure_d), 
            decade = max(decade), # decade of termination
            year = max(year),     # year of termination
            decade0 = min(decade),    # decade of appointment
            year0 = min(year),    # year of appointment
            fm_gender = mean(fm_gender), 
            fm_gender_f = if_else(fm_gender == 1, "Woman", "Man"), 
            fm_name = first(fm_name),
            country_name = first(country_name),
            left_trunc = min(left_trunc),
            right_cens_max = max(right_cens)
            )
```

```{r}
table(spell_lev $ fm_gender_f)
```

Which women served multiple times?

```{r}
spell_lev |> 
  filter(fm_gender_f == "Woman") |> 
  group_by(fm_name, country_name) |> 
  tally() |> 
  filter(n > 1)
```

## Minister Tenure by Gender (Appendix A2)

Left truncation and right censoring

```{r}
with(spell_lev, table(left_trunc, right_cens_max, useNA = "ifany"))
```

```{r}
spell_lev |> filter(left_trunc == 1 & right_cens_max == 1)
```


```{r}
spell_lev |> filter(right_cens_max == 1) |> pull(fm_gender_f) |> table()
```


We will focus on non-censored spells. We can keep the left-truncated spells because we have records of the origin dates for them.

```{r}
spell_full <- spell_lev |> filter(right_cens_max == 0)
```

Spells that include right-censored ones.

```{r}
table(spell_lev $ fm_gender_f)
```

Spells that exclude right-censored ones.

```{r}
table(spell_full $ fm_gender_f)
```


Average tenure length by gender.

```{r}
spell_full |> 
  filter(fail == 1) |> 
  group_by(fm_gender_f) |> 
  summarise(avg_tenure = median(time))
```

Table A2: Minister Tenure by Gender, 1972-2017

```{r}
spell_stat <- spell_full |> 
  group_by(fm_gender_f) |> 
  summarise(n = n(), 
            Mean = mean(time) |> round(1), 
            Median = median(time) |> round(1), 
            Minimum = min(time) |> round(1), 
            Maximum = max(time) |> round(1), 
            SD = sd(time) |> round(1)) |> 
  as.data.frame()

kable(spell_stat)
```

```{r}
summary(spell_full $ time[spell_full $ fm_gender == 1])
```


```{r}
datatable(spell_stat)
```

Visualization

```{r}
g_tenure <- ggplot(spell_full, 
                   mapping = aes(x = fm_gender_f, y = time, 
                                 color = fm_gender_f, 
                                 fill = fm_gender_f)) + 
  scale_color_manual(values = my_pal, guide = "none") +
  scale_fill_manual(values = my_pal, guide = "none") + 
  labs(x = "", y = "Minister tenure (months)") + 
  geom_boxplot(
    width = .2, fill = "white",
    size = 1.5, outlier.shape = NA
  ) +
  ggdist::stat_halfeye(
    adjust = .33, ## bandwidth
    width = .67, 
    color = NA, ## remove slab interval
    position = position_nudge(x = .15)
  ) +  
  gghalves::geom_half_point(
    side = "l", 
    range_scale = .3, 
    alpha = .5, size = 3
  ) + 
  theme_minimal()
```


```{r}
g_tenure + scale_y_log10()
ggsave("Figures/FigureA2_MinisterTenure.pdf", width = 8, height = 6)
```



Average tenure length by decades

```{r}
spell_full |> 
  group_by(decade) |> 
  summarise(avg_tenure = median(time))
```

## Descriptive Statistics (Table A1)

Code below produces a table that shows descriptive statistics for covariates that are measured annually.

```{r}
covs_year <- df |> 
  select(iso3c, year, 
         womenrep_lag, leadgender, presidential, unified_aut, xm_qudsest, 
         inflate_lag, loggdppc_lag, growth_lag, tradepergdp_i_lag, kaopen_i_lag) |> 
  group_by(iso3c, year) |> 
  summarise_all(median, na.rm = TRUE)
```

```{r, message = FALSE, echo = FALSE, eval = FALSE}
stargazer(covs_year |> select(-iso3c, -year) |> as.data.frame(), 
          type = "text", 
          digits = 1, 
          covariate.labels = c("Women in Parliament", 
                               "Woman Head of Government", 
                               "Presidential System", 
                               "Unified Government", 
                               "Democracy Score", 
                               "Inflation", 
                               "Per capita GDP (logged)", 
                               "Economic Growth", 
                               "Trade Openness", 
                               "Capital Openness"))
```


```{r, message = FALSE, results = 'asis'}
stargazer(covs_year |> select(-iso3c, -year) |> as.data.frame(), 
          type = "html", 
          digits = 1,
          covariate.labels = c("Women in Parliament", 
                               "Woman Head of Government", 
                               "Presidential System", 
                               "Unified Government", 
                               "Democracy Score", 
                               "Inflation", 
                               "Per capita GDP (logged)", 
                               "Economic Growth", 
                               "Trade Openness", 
                               "Capital Openness"), 
          title = "Table A1: Descriptive Statistics for Annually Measured Covariates, 1972-2017",
          out = "Tables/tableA1.html")
```


## Gender Ratio during Crisis (Figure A1)

Crisis Measures

```{r}
df_crisis <- df |> 
  mutate(fm_gender_f = if_else(fm_gender == 1, "Woman", "Man"),
         crisis_f = if_else(crisis_mon == 1, "Crisis", "Not crisis"),
         crisis_f_n = if_else(crisis_mon == 1, 
                              "Crisis (n = 5,111)", 
                              "Not crisis (n = 95,401)"))
```


Banking Crisis (monthly)

```{r}
table(df_crisis $ crisis_f_n, useNA = "ifany")
```

```{r}
with(df_crisis, table(crisis_f, fm_gender_f, useNA = "ifany"))
```



- Time since last non-crisis (during crisis)
- Time since last crisis (during non-crisis)

```{r}
df_spell <- df_crisis |> 
  mutate(non_crisis_mon = if_else(crisis_mon == 1, 0, 1))
with(df_spell, table(crisis_mon, non_crisis_mon))
```

Calculate the number of months since...  
- last non-crisis month (= 1 during peace time, 1,2,3,... during crisis)  
- last crisis month (= 1 during crisis, 1,2,3,... during peace time)  


```{r}
spell1 <- pltesim::btscs(df_spell, "non_crisis_mon", "date", "iso3c")
spell1 <- spell1 |> rename(time_since_0 = spell_time)
spell2 <- pltesim::btscs(spell1, "crisis_mon", "date", "iso3c")
df_spell <- spell2 |> rename(time_since_1 = spell_time)
```



Calculate the "risk" of woman holding FM-ship during a crisis.

```{r}
fmrisk_0 <- df_spell |> 
  group_by(crisis_mon, fm_gender) |> 
  summarise(N = n()) |> 
  mutate(Ratio = round(N / sum(N), 4))
fmrisk_0
```

```{r}
fmrisk_t0 <- fmrisk_0 |> ungroup() |> 
  filter(crisis_mon == 0 & fm_gender == 1) |> 
  mutate(time_since_0 = 0) |> 
  select(time_since_0, ratio_woman = Ratio)
fmrisk_t0
```

- During a non-crisis time, 3.85% of ministers are women.  
- During a crisis time, 5.24% of ministers are women.  


At t = - 1

Calculate the "risk" at time t - 1 (one month before a crisis begins).

```{r}
fmrisk_t1 <- df_spell |> 
  arrange(iso3c, date) |> 
  group_by(iso3c) |> 
  mutate(crisis_next = lead(crisis_mon)) |> 
  filter(crisis_mon == 0 & crisis_next == 1)

fmrisk_t1 <- fmrisk_t1 |> group_by(fm_gender) |> 
  summarise(N = n()) |> 
  mutate(Ratio = round(N / sum(N), 4)) |> 
  filter(fm_gender == 1) |> 
  mutate(time_since_0 = -1) |>  
  select(time_since_0, ratio_woman = Ratio)
```



At t = 0

Calculate the "risk" of woman holding FM-ship during a crisis over time.


```{r}
fmrisk <- df_spell |> 
  filter(crisis_mon == 1) |> 
  group_by(time_since_0, fm_gender) |> 
  summarise(N = n()) |> 
  mutate(Ratio = round(N / sum(N), 4)) |> 
  filter(fm_gender == 1) |> 
  select(time_since_0, ratio_woman = Ratio) |> 
  ungroup()
```

Combine t=-1, t=0, and during crisis.

```{r}
fmrisk <- bind_rows(fmrisk_t1, fmrisk)
fmrisk
```


"Risk" of a woman holding a finance minister office during a crisis. 

```{r}
ggplot(data = fmrisk) + 
  geom_hline(yintercept = fmrisk_t0 $ ratio_woman * 100, linetype = 2) + 
  geom_line(aes(x = time_since_0, y = ratio_woman * 100)) + 
  scale_y_continuous(limits = c(0, 8), 
                     breaks = c(0, 2, 4, 6, 8), 
                     labels = c("0%", "2%", "4%", "6%", "8%")) + 
  scale_x_continuous(limits = c(-1, 60), 
                     breaks = c(0, 12, 24, 36, 48, 60),
                     labels = c("0", "1", "2", "3", "4", "5")) + 
  labs(x = "Years since the beginning of a banking crisis", 
       y = "Percentage of woman finance ministers",
       title = "") + 
  theme_minimal()

ggsave("Figures/FigureA1_GenderRatio.pdf", width = 8, height = 6)
```



# 2. Women's Initial Access


## Main Analysis

### Table 1

Estimate five models reported in Table 1.

```{r}
## Base model formula
base_fml <- formula(Surv(fwfm72_t0, fwfm72_t, fwfm72_d) ~  .)

## Estimate five models
wia_m1 <- coxph(update(base_fml, . ~ + crisis_mon), 
                data = df, cluster = iso3c, ties=c("efron"))
## + woman 
wia_m2 <- update(wia_m1, 
                 update(base_fml, . ~ crisis_mon + 
                          womenrep_lag + leadgender))
## + institution
wia_m3 <- update(wia_m1, 
                 update(base_fml, . ~ crisis_mon + 
                          presidential + xm_qudsest + unified_aut))
## + economic
wia_m4 <- update(wia_m1, 
                 update(base_fml, . ~ crisis_mon + 
                          growth_lag + inflate_lag + loggdppc_lag + 
                          tradepergdp_i_lag + kaopen_i_lag))
## full model
wia_m5 <- update(wia_m1, 
                 update(base_fml, . ~ crisis_mon + 
                          womenrep_lag + leadgender + 
                          presidential + xm_qudsest + unified_aut + 
                          growth_lag + inflate_lag + loggdppc_lag + 
                          tradepergdp_i_lag + kaopen_i_lag))


## Put them together
model_list_tbl1 <- list(wia_m1, wia_m2, wia_m3, wia_m4, wia_m5)
## Add AIC
model_list_tbl1 <- lapply(model_list_tbl1, add_AIC)
```

Show hazard rate coefficients.

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_tbl1,
          se = lapply(model_list_tbl1, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", 
                               "Women in Parliament",
                               "Woman Head of Government", 
                               "Presidential", 
                               "Democracy Score", 
                               "Unified Government",
                               "Economic Growth", 
                               "Inflation", 
                               "GDP per Capita", 
                               "Trade Openness", 
                               "Capital Openness"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table 1: Banking Crisis and Women's Initial Access to the Finance Ministry, 1972-2017",
          type = "text")
```


```{r, warning = FALSE, results = 'asis'}
stargazer(model_list_tbl1,
          se = lapply(model_list_tbl1, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", 
                               "Women in Parliament",
                               "Woman Head of Government", 
                               "Presidential", 
                               "Democracy Score", 
                               "Unified Government",
                               "Economic Growth", 
                               "Inflation", 
                               "GDP per Capita", 
                               "Trade Openness", 
                               "Capital Openness"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table 1: Banking Crisis and Women's Initial Access to the Finance Ministry, 1972-2017",
          type = "html",
          out = "Tables/table1.html")
```


We calculate N for each model. We first prepare smaller data sets for different models.

```{r, message = FALSE}
df_m1 <- df |> select(fail = fwfm72_d, crisis_mon, iso3c) |> na.omit()
df_m2 <- df |> select(fail = fwfm72_d, crisis_mon, iso3c, 
                      womenrep_lag, leadgender) |> na.omit()
df_m3 <- df |> select(fail = fwfm72_d, crisis_mon, iso3c, 
                      presidential, xm_qudsest, unified_aut) |> na.omit()
df_m4 <- df |> select(fail = fwfm72_d, crisis_mon, iso3c, 
                      growth_lag, inflate_lag, loggdppc_lag, 
                      tradepergdp_i_lag, kaopen_i_lag) |> na.omit()
df_m5 <- df |> select(fail = fwfm72_d, crisis_mon, iso3c, 
                      womenrep_lag, leadgender,
                      presidential, xm_qudsest, unified_aut,
                      growth_lag, inflate_lag, loggdppc_lag, 
                      tradepergdp_i_lag, kaopen_i_lag) |> na.omit()
```

```{r}
report_n <- function(df){
  data.frame(n = dim(df)[1], 
             n_cntry = length(unique(df $ iso3c)),
             n_fail = sum(df $ fail == 1))
}
res_n <- sapply(list(df_m1, df_m2, df_m3, df_m4, df_m5), report_n)
res_n <- unlist(res_n) |> matrix(byrow=F, ncol=5) |> data.frame()
rownames(res_n) <- c("N. Obs", "N. Country", "N. Failures")
colnames(res_n) <- c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5")
kable(res_n)
```




### Figure 1

We will plot exponentiated coefficients (Hazard ratio).

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_tbl1,
          apply.coef = exp, # Report exponentiated coef = hazard ratio
          p.auto = FALSE,   # necessary to stop reporting misleading p values
          se = lapply(model_list_tbl1, get_clse_or),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", "Women in Parliament",
                               "Woman HoG", "Presidential", 
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita", 
                               "Trade Openness", "Capital Openness"),
          dep.var.caption = c("Hazard Ratio (> 1 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Women's first access to FM, 1972-2017",
          type = "text")
```


```{r}
df_hr <- data.frame(pe = NA, low = NA, up = NA)
for (i in 1:length(model_list_tbl1)){
  df_hr[i, ] <- extract_hr(model_list_tbl1[[i]])
}

df_hr <- df_hr |> 
  mutate(model = c("Model 1: Bivariate\n199 countries\n75 women", 
                   "Model 2: + Woman\n182 countries\n65 women", 
                   "Model 3: + Institution\n164 countries\n63 women", 
                   "Model 4: + Economic\n169 countries\n64 women", 
                   "Model 5: Full\n150 countries\n55 women"), 
         mnum = seq(5, 1))
```


```{r}
ggplot(data = df_hr, 
       mapping = aes(x = pe, y = reorder(model, mnum))) + 
  geom_vline(xintercept = 1, linetype = 2, alpha = 0.4) + 
  geom_segment(aes(x = low, xend = up, yend = reorder(model, mnum))) + 
  geom_label(aes(label = round(pe, 2))) + 
  scale_x_continuous(breaks = c(1,3,5,7)) + 
  labs(x = "Hazard ratio for banking crisis", y = NULL,
       title = "") + 
  theme_minimal() + 
  theme(axis.text=element_text(size=12))
ggsave("Figures/Figure2_InitialAccess.pdf", width = 8, height = 5)
```


## Testing non-proportionality

We follow Park and Hendry's (2015) approach to test the non-proportionality using different transformations of time.

### Table B10

Model 1

```{r}
zphTable(wia_m1, digits = 3)
```

Model 2

```{r}
zphTable(wia_m2, digits = 3)
```

Model 3

```{r}
zphTable(wia_m3, digits = 3)
```

Model 4

```{r}
zphTable(wia_m4, digits = 3)
```


Model 5

```{r}
zphTable(wia_m5, digits = 3)
```


Summary of Results

```{r}
zph_m1 <- zphTable(wia_m1, digits = 2)[, c(3, 6)]
zph_m2 <- zphTable(wia_m2, digits = 2)[, c(3, 6)]
zph_m3 <- zphTable(wia_m3, digits = 2)[, c(3, 6)]
zph_m4 <- zphTable(wia_m4, digits = 2)[, c(3, 6)]
zph_m5 <- zphTable(wia_m5, digits = 2)[, c(3, 6)]

zphdf_m1 <- data.frame(x = rownames(zph_m1), km.p1 = zph_m1[,1], rank.p1 = zph_m1[,2])
zphdf_m2 <- data.frame(x = rownames(zph_m2), km.p2 = zph_m2[,1], rank.p2 = zph_m2[,2])
zphdf_m3 <- data.frame(x = rownames(zph_m3), km.p3 = zph_m3[,1], rank.p3 = zph_m3[,2])
zphdf_m4 <- data.frame(x = rownames(zph_m4), km.p4 = zph_m4[,1], rank.p4 = zph_m4[,2])
zphdf_m5 <- data.frame(x = rownames(zph_m5), km.p5 = zph_m5[,1], rank.p5 = zph_m5[,2])

zphdf_sum <- full_join(zphdf_m1, zphdf_m2)
zphdf_sum <- full_join(zphdf_sum, zphdf_m3)
zphdf_sum <- full_join(zphdf_sum, zphdf_m4)
zphdf_sum <- full_join(zphdf_sum, zphdf_m5)
xid <- which(zphdf_sum $ x == "GLOBAL")
zphdf_sum <- rbind(zphdf_sum[-xid, ], zphdf_sum[xid, ])
zphdf_sum $ x <- c("Banking Crisis", "Women in Parliament",
                   "Woman Head of Government", "Presidential", 
                   "Democracy Score", "Unified Government",
                   "Economic Growth", "Inflation", "GDP per Capita", 
                   "Trade Openness", "Capital Openness", "Global")
```

Summary table

```{r}
kable(zphdf_sum, row.names = FALSE) |> 
  kable_classic(full_width = FALSE) |> 
  add_header_above(c(" " = 1, "Model 1" = 2, "Model 2" = 2, "Model 3" = 2,
                     "Model 4" = 2, "Model 5" = 2))

kable(zphdf_sum, row.names = FALSE) |> 
  kable_classic(full_width = FALSE) |> 
  add_header_above(c(" " = 1, "Model 1" = 2, "Model 2" = 2, "Model 3" = 2,
                     "Model 4" = 2, "Model 5" = 2)) |> 
  save_kable(file = "Tables/tableB10.html", self_contained = TRUE)
```


### Table B11

Global test suggest that none of the model exhibits violation of the PH assumption.  

That being said, note  
- for Trade Openness, KM and rank test suggests a possible violation in Model 4.  
- for Inflation, KM test suggests a possible violation in Model 5.  

We will thus relax the PH assumption with respect to these variables for models 4 and 5.  


For Model 4: Tests with Kaplan-Meier transformation and rank transformation both suggest Trade Openness may exhibit non-proportionality. We estimate models that include an interaction term between Trade Openness and some function of time.


```{r}
## f(t) = t
wia_m4nph1 <- update(wia_m1, 
                     update(base_fml, . ~ crisis_mon + 
                              growth_lag + inflate_lag + loggdppc_lag + 
                              tradepergdp_i_lag + tt(tradepergdp_i_lag) + 
                              kaopen_i_lag),
                     tt = function(x, t, ...) x * t)
## f(t) = log(t)
wia_m4nph2 <- update(wia_m1, 
                     update(base_fml, . ~ crisis_mon + 
                              growth_lag + inflate_lag + loggdppc_lag + 
                              tradepergdp_i_lag + tt(tradepergdp_i_lag) + 
                              kaopen_i_lag),
                     tt = function(x, t, ...) x * log(t))
## f(t) = sqrt(t)
wia_m4nph3 <- update(wia_m1, 
                     update(base_fml, . ~ crisis_mon + 
                              growth_lag + inflate_lag + loggdppc_lag + 
                              tradepergdp_i_lag + tt(tradepergdp_i_lag) + 
                              kaopen_i_lag),
                     tt = function(x, t, ...) x * sqrt(t))
model_list_m4_nph <- list(wia_m4, wia_m4nph1, wia_m4nph2, wia_m4nph3)
```


```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_m4_nph,
          se = lapply(model_list_m4_nph, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis",
                               "Economic Growth", "Inflation", "GDP per Capita",
                               "Trade Openness",
                               "Trade Openness x f(time)",
                               "Capital Openness"),
          dep.var.caption = c("Hazard Ratio (> 1 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Women's first access to FM, 1972-2017",
          column.labels = c("Model 4","Identity", "Log", "Sqrt"),
          type = "text")
```


Compare model fit by AIC. 

```{r}
lapply(model_list_m4_nph, extractAIC)
```

Model with identity (linear time) fits the data better.



For Model 5: KM test suggests inflation may exhibit non-proportionality. We estimate models that include an interaction term between inflation and some function of time.

```{r}
## f(t) = t
wia_m5nph1 <- update(wia_m1, 
                     update(base_fml, . ~ crisis_mon + 
                              womenrep_lag + leadgender + 
                              presidential + xm_qudsest + unified_aut + 
                              growth_lag + 
                              inflate_lag + tt(inflate_lag) +  
                              loggdppc_lag + tradepergdp_i_lag + kaopen_i_lag), 
                     tt = function(x, t, ...) x * t)
## f(t) = log(t)
wia_m5nph2 <- update(wia_m1, 
                     update(base_fml, . ~ crisis_mon + 
                              womenrep_lag + leadgender + 
                              presidential + xm_qudsest + unified_aut + 
                              growth_lag + 
                              inflate_lag + tt(inflate_lag) +  
                              loggdppc_lag + tradepergdp_i_lag + kaopen_i_lag), 
                     tt = function(x, t, ...) x * log(t))
## f(t) = sqrt(t)
wia_m5nph3 <- update(wia_m1, 
                     update(base_fml, . ~ crisis_mon + 
                              womenrep_lag + leadgender + 
                              presidential + xm_qudsest + unified_aut + 
                              growth_lag + 
                              inflate_lag + tt(inflate_lag) +  
                              loggdppc_lag + tradepergdp_i_lag + kaopen_i_lag), 
                     tt = function(x, t, ...) x * sqrt(t))

model_list_m5nph <- list(wia_m5, wia_m5nph1, wia_m5nph2, wia_m5nph3)
```

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_m5nph,
          se = lapply(model_list_m5nph, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Women's first access to FM, 1972-2017",
          column.labels = c("Model 5", "Identity", "Log", "Sqrt"),
          type = "text")
```


```{r}
lapply(model_list_m5nph, extractAIC)
```

Model with log(time) fits the data better.

```{r}
model_list_m45 <- list(wia_m4, wia_m4nph1, wia_m5, wia_m5nph2)
model_list_m45 <- lapply(model_list_m45, add_AIC)
```

```{r warning=FALSE, results = 'asis'}
stargazer(model_list_m45,
          se = lapply(model_list_m45, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          column.labels = c("Model 4", "NPH", "Model 5", "NPH"),
          covariate.labels = c("Crisis", "Women in Parliament",
                               "Woman HoG", "Presidential", 
                               "Democracy Score", "Unified",
                               "Economic Growth", "Inflation", "Inflation x f(time)",
                               "GDP per Capita", 
                               "Trade Openness", "Trade Openness x f(time)",
                               "Capital Openness"),
          type = "html", 
          title = "Table B11: Banking Crisis and Women's Initial Access to the Finance Ministry, 1972-2017 -- Relaxing the PH Assumption",
          out = "Tables/tableB11.html")
```


## Corporate Quotas

Estimate models that include corporate quotas as an additional control variable.


```{r}
wia_m5_q1 <- update(wia_m1, 
                 update(base_fml, . ~ crisis_mon + quota + 
                          womenrep_lag + leadgender + 
                          presidential + xm_qudsest + unified_aut + 
                          growth_lag + inflate_lag + loggdppc_lag + 
                          tradepergdp_i_lag + kaopen_i_lag))
wia_m5_q2 <- update(wia_m1, 
                 update(base_fml, . ~ crisis_mon + hard_quota + 
                          womenrep_lag + leadgender + 
                          presidential + xm_qudsest + unified_aut + 
                          growth_lag + inflate_lag + loggdppc_lag + 
                          tradepergdp_i_lag + kaopen_i_lag))
wia_m5_q3 <- update(wia_m1, 
                 update(base_fml, . ~ crisis_mon + comp_hard_quota + 
                          womenrep_lag + leadgender + 
                          presidential + xm_qudsest + unified_aut + 
                          growth_lag + inflate_lag + loggdppc_lag + 
                          tradepergdp_i_lag + kaopen_i_lag))

model_list_tblB1 <- list(wia_m5, wia_m5_q1, wia_m5_q2, wia_m5_q3)
## Add AIC
model_list_tblB1 <- lapply(model_list_tblB1, add_AIC)
```

### Table B1

Show hazard rate coefficients.

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_tblB1,
          se = lapply(model_list_tblB1, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", 
                               "Corporate Quota",
                               "Hard Quota",
                               "Comp Hard Quota",
                               "Women in Parliament",
                               "Woman Head of Government", "Presidential", 
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita", 
                               "Trade Openness", "Capital Openness"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Women's first access to FM, 1972-2017: Corporate Quota",
          type = "text")
```

```{r, results = 'asis', warning = FALSE}
stargazer(model_list_tblB1,
          se = lapply(model_list_tblB1, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", 
                               "Corporate Quota",
                               "Hard Quota",
                               "Comp Hard Quota",
                               "Women in Parliament",
                               "Woman Head of Government", "Presidential", 
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita", 
                               "Trade Openness", "Capital Openness"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table B1: Women’s Initial Access to FM, 1972-2017: Controlling for Corporate Quota",
          type = "html",
          out = "Tables/tableB1.html")
```


## Starting Time

We estimate models using alternative starting times (1946 and 1966). 

```{r}
## 1946 as starting
s1946_fml <- formula(Surv(fwfm46_t0, fwfm46_t, fwfm46_d) ~  .)
## 1966 as starting
s1966_fml <- formula(Surv(fwfm66_t0, fwfm66_t, fwfm66_d) ~  .)

## Estimate 46 and 66
wia_m5_46 <- coxph(update(s1946_fml, . ~ crisis_mon + 
                          womenrep_lag + leadgender + 
                          presidential + xm_qudsest + unified_aut + 
                          growth_lag + inflate_lag + loggdppc_lag + 
                          tradepergdp_i_lag + kaopen_i_lag), 
                data = df, cluster = iso3c, ties=c("efron"))
wia_m5_66 <- coxph(update(s1966_fml, . ~ crisis_mon + 
                          womenrep_lag + leadgender + 
                          presidential + xm_qudsest + unified_aut + 
                          growth_lag + inflate_lag + loggdppc_lag + 
                          tradepergdp_i_lag + kaopen_i_lag), 
                data = df, cluster = iso3c, ties=c("efron"))
model_list_tblB2 <- list(wia_m5, wia_m5_66, wia_m5_46)
## Add AIC
model_list_tblB2 <- lapply(model_list_tblB2, add_AIC)

```

### Table B2

Show hazard rate coefficients.

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_tblB2,
          se = lapply(model_list_tblB2, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", "Women in Parliament",
                               "Woman Head of Government", "Presidential", 
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita", 
                               "Trade Openness", "Capital Openness"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Women's first access to FM, 1972-2017",
          type = "text")
```


```{r, results = 'asis', warning = FALSE}
stargazer(model_list_tblB2,
          se = lapply(model_list_tblB2, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", "Women in Parliament",
                               "Woman Head of Government", "Presidential", 
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita", 
                               "Trade Openness", "Capital Openness"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table B2: Women’s Initial Access to FM: Alternative Starting Time",
          type = "html",
          out = "Tables/tableB2.html")
```




## Alternative Methods for Ties

We used Efron method in our main model. We will see if the results are robust to an alternative method (Breslow's method).

```{r}
cox_br1 <- update(wia_m1, ties = c("breslow"))
cox_br2 <- update(wia_m2, ties = c("breslow"))
cox_br3 <- update(wia_m3, ties = c("breslow"))
cox_br4 <- update(wia_m4, ties = c("breslow"))
cox_br5 <- update(wia_m5, ties = c("breslow"))

model_list_tblB3 <- list(cox_br1, cox_br2, cox_br3, cox_br4, cox_br5)
## Add AIC
model_list_tblB3 <- lapply(model_list_tblB3, add_AIC)

```

### Table B3

Show hazard rate coefficients.

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_tblB3,
          se = lapply(model_list_tblB3, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", "Women in Parliament",
                               "Woman Head of Government", "Presidential", 
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita", 
                               "Trade Openness", "Capital Openness"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Women's first access to FM, 1972-2017: Breslow's method",
          type = "text")
```

```{r, warning = FALSE, results = 'asis'}
stargazer(model_list_tblB3,
          se = lapply(model_list_tblB3, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", "Women in Parliament",
                               "Woman Head of Government", "Presidential", 
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita", 
                               "Trade Openness", "Capital Openness"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table B3: Women’s Initial Access to FM, 1972-2017: Breslow’s Method",
          type = "html",
          out = "Tables/tableB3.html")
```


### Figure B1

```{r}
df_hr_br <- data.frame(pe = NA, low = NA, up = NA)
for (i in 1:length(model_list_tblB3)){
  df_hr_br[i, ] <- extract_hr(model_list_tblB3[[i]])
}

df_hr_br <- df_hr_br |> 
  mutate(model = c("Model 1: Bivariate\n199 countries\n75 women", 
                   "Model 2: + Woman\n182 countries\n65 women", 
                   "Model 3: + Institution\n164 countries\n63 women", 
                   "Model 4: + Economic\n169 countries\n64 women", 
                   "Model 5: Full\n150 countries\n55 women"), 
         mnum = seq(5, 1), 
         tie = "Breslow")
df_hr $ tie <- "Efron"
df_hr_tie <- bind_rows(df_hr, df_hr_br)

df_hr == df_hr_br
```



```{r}
ggplot(data = df_hr_tie, 
            mapping = aes(x = pe, xmin = low, xmax = up, 
                          y = reorder(model, mnum), color = tie)) + 
  geom_vline(xintercept = 1, linetype = 2, alpha = 0.4) + 
  geom_pointrange(position = position_dodge(width = .5)) + 
  labs(x = "Effect of banking crisis on survival time until first appointment", 
       y = NULL, 
       color = "Methods for\nHandling Ties") + 
  theme(legend.position = "top") + 
  scale_shape_manual(values = c(16,1)) + 
  scale_color_manual(values = c("black", "gray")) + 
  scale_x_continuous(breaks = c(1,3,5, 7)) + 
  guides(color = guide_legend(reverse = TRUE)) + 
  theme_minimal() + 
  theme(axis.text=element_text(size=12))
ggsave("Figures/FigureB1_Ties.pdf", width = 8, height = 5)
```


## Various Indicators of Crisis

Currency Crisis

```{r}
## Calendar year
cc_tc <- update(wia_m5, 
                update(base_fml, . ~ currency_mon0 + 
                          womenrep_lag + leadgender + 
                          presidential + xm_qudsest + unified_aut + 
                          growth_lag + inflate_lag + loggdppc_lag + 
                          tradepergdp_i_lag + kaopen_i_lag))
```

Inflation crisis

```{r}
fml_inf <- formula(Surv(fwfm72_t0, fwfm72_t, fwfm72_d) ~  womenrep_lag + leadgender + 
                     presidential + xm_qudsest + unified_aut + 
                     growth_lag + loggdppc_lag + 
                     tradepergdp_i_lag + kaopen_i_lag)
cox_i0 <- update(wia_m1, update(fml_inf, . ~ crisis_mon + .), 
                 data = df |> filter(!is.na(crisis20)))
cox_i1 <- update(wia_m1, update(fml_inf, . ~ crisis20 + .), data = df)
```


Unemployment crisis

As the data are available only for 1990-2017, we will compare this against 1990 data.


```{r}
cox_u0 <- update(wia_m1, update(fml_inf, . ~ crisis_mon + .), 
                 data = df |> filter(!is.na(cri_avgsd5_roll)))
cox_u1 <- update(wia_m1, update(fml_inf, . ~ cri_avgsd5_roll + .), data = df)
```


### Table B4

Combine baseline results and currency, inflation, and unemployment crises models.

```{r}
model_list_tblB4 <- list(wia_m5, cc_tc, cox_i0, cox_i1, cox_u0, cox_u1)
## Add AIC
model_list_tblB4 <- lapply(model_list_tblB4, add_AIC)
```


```{r, results = 'asis', warning = FALSE}
stargazer(model_list_tblB4,
          se = lapply(model_list_tblB4, get_clse_hr),
          covariate.labels = c("Banking Crisis", 
                               "Currency Crisis (calendar year)",
                               "Inflation Crisis ( > 20%)",
                               "Unemployment Crisis (> 1 s.d.)",
                               "Women in Parliament",
                               "Woman Head of Government", "Presidential",
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita",
                               "Trade Openness", "Capital Openness"),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table B4: Women’s Initial Access: Various Indicators of Crisis, 1972-2017",
          type = "html",
          out = "Tables/tableB4.html")
```


We calculate N for each model. We first prepare smaller data sets for different models.

```{r, message = FALSE}
df_i0 <- df |> select(fail = fwfm72_d, crisis_mon, crisis20, iso3c, 
                      womenrep_lag, leadgender,
                      presidential, xm_qudsest, unified_aut,
                      growth_lag, loggdppc_lag, 
                      tradepergdp_i_lag, kaopen_i_lag) |> na.omit()
df_u0 <- df |> select(fail = fwfm72_d, crisis_mon, cri_avgsd5_roll, iso3c, 
                      womenrep_lag, leadgender,
                      presidential, xm_qudsest, unified_aut,
                      growth_lag, loggdppc_lag, 
                      tradepergdp_i_lag, kaopen_i_lag) |> na.omit()
```

```{r}
res_n <- sapply(list(df_m5, df_i0, df_u0), report_n)
res_n <- unlist(res_n) |> matrix(byrow = F, ncol = 3) |> data.frame()
rownames(res_n) <- c("N. Obs", "N. Country", "N. Failures")
colnames(res_n) <- c("Models 1-2", "Model3 3-4", "Models 5-6")
kable(res_n)
```



## Woman HoG (Historical)

We control for whether a country has ever had a woman head of government. 

Identify the year in which a country had a woman head of government for the first time.

```{r}
## Find first year with woman HoG
first_whog_year <- df |> 
  filter(leadgender == 1) |> 
  group_by(iso3c) |> 
  summarise(first_whog_year = min(year))
## Merge this and define "woman HoG ever"
yd_whog <- left_join(df, first_whog_year)
yd_whog $ first_whog_year[is.na(yd_whog $ first_whog_year)] <- 9999
## Those that had woman HoG in 1966-1971. 
yd_whog $ first_whog_year[yd_whog $ iso3c == "IND"] <- 1966
yd_whog $ first_whog_year[yd_whog $ iso3c == "ISR"] <- 1969
yd_whog $ first_whog_year[yd_whog $ iso3c == "LKA"] <- 1971
yd_whog <- yd_whog |> 
  mutate(whog_ever = if_else(year >= first_whog_year, 1, 0),
         crisis_whoge = if_else(whog_ever == 0 & crisis_mon == 0, "whog=0, cr=0", "NA"),
         crisis_whoge = if_else(whog_ever == 0 & crisis_mon == 1, "whog=0, cr=1", crisis_whoge),
         crisis_whoge = if_else(whog_ever == 1 & crisis_mon == 0, "whog=1, cr=0", crisis_whoge),
         crisis_whoge = if_else(whog_ever == 1 & crisis_mon == 1, "whog=1, cr=1", crisis_whoge),
         crisis_whogeAlt = if_else(crisis_whoge == "whog=1, cr=1", "0whog=1, cr=1", crisis_whoge))
# with(yd_whog, table(crisis_mon, whog_ever, useNA = "ifany"))
# with(yd_whog, table(crisis_whoge, useNA = "ifany"))
# with(yd_whog, table(leadgender, whog_ever, useNA = "ifany"))
# with(yd_whog, table(crisis_whoge, crisis_whogeAlt, useNA = "ifany"))
```

Estimate models that control for woman head of government variables.

```{r}
rhs_cov <- formula(Surv(fwfm72_t0, fwfm72_t, fwfm72_d) ~  womenrep_lag + leadgender + 
                     presidential + xm_qudsest + unified_aut + 
                     growth_lag + inflate_lag + loggdppc_lag + 
                     tradepergdp_i_lag + kaopen_i_lag)

## Include binary woman HoG ever
wia_m15 <- update(wia_m1, update(rhs_cov, . ~ crisis_mon + whog_ever + .), data = yd_whog)
## Interact with binary woman HoG ever
wia_m16 <- update(wia_m1, update(rhs_cov, . ~ crisis_mon * whog_ever + .), data = yd_whog)
## Alternative specifications (same models, different representation)
## We need these to obtain p-values for interaction terms.
wia_m16a <- update(wia_m1, update(rhs_cov, . ~ crisis_whoge + .), data = yd_whog)
wia_m16b <- update(wia_m1, update(rhs_cov, . ~ crisis_whogeAlt + .), data = yd_whog)

## Put together results
model_list_tblB5c <- list(wia_m5, wia_m15, wia_m16, wia_m16a, wia_m16b)
## Add AIC
model_list_tblB5c <- lapply(model_list_tblB5c, add_AIC)
```

Check results

```{r, warning = FALSE, echo = FALSE, eval = FALSE}
stargazer(model_list_tblB5c,
          se = lapply(model_list_tblB5c, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Interact Crisis x Woman HoG ever",
          type = "text")
```


### Table B5

```{r, warning = FALSE, results = 'asis'}
model_list_tblB5 <- list(wia_m5, wia_m15, wia_m16a)
## Add AIC
model_list_tblB5 <- lapply(model_list_tblB5, add_AIC)

stargazer(model_list_tblB5,
          se = lapply(model_list_tblB5, get_clse_hr),
          covariate.labels = c("Banking Crisis", 
                               "Had Woman HoG Ever", 
                               "WHoGE = 0, Crisis = 1", 
                               "WHoGE = 1, Crisis = 0",
                               "WHoGE = 1, Crisis = 1",
                               "Women in Parliament",
                               "Woman Head of Government", 
                               "Presidential", 
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita", 
                               "Trade Openness", "Capital Openness"),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table B5: Conditional Effect of Banking Crisis: Woman HoG (Historical), 1972-2017",
          type = "html", 
          out = "Tables/tableB5.html")
```

Obtain p-values using the models with alternative specifications.

```{r, warning = FALSE, echo = FALSE, eval = FALSE}
model_list_16p <- list(wia_m16a, wia_m16b)
stargazer(model_list_16p,
          se = lapply(model_list_16p, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          keep = c("crisis_whoge*"),
          report = ("vc*p"),
          type = "text")
```

### Figure B2

```{r}
df_hr <- summary(wia_m16a) $ conf.int[c("crisis_whogewhog=0, cr=1", 
                                       "crisis_whogewhog=1, cr=0", 
                                       "crisis_whogewhog=1, cr=1"),] |> 
  as.data.frame() |> 
  select(hr = "exp(coef)", low = "lower .95", up = "upper .95") |> 
  mutate(womanHoG = c("No", "Yes", "Yes"),
         crisis = c("Crisis = 1", "Crisis = 0", "Crisis = 1"), 
         gc = c("No woman HoG ever\nCrisis", 
                "Woman HoG before\nNo Crisis", "Woman HoG before\nCrisis"),
         signif = if_else((low > 1 & up > 1) | (low < 1 & up < 1), "Yes", "No"),
         mnum = c(3,2,1))
df_hr
```

```{r}
ggplot(data = df_hr, 
       mapping = aes(x = hr, y = reorder(gc, mnum))) + 
  geom_vline(xintercept = 1, linetype = 2, alpha = 0.4) + 
  geom_segment(aes(x = low, xend = up, yend = reorder(gc, mnum),
                   color = signif), size = 1.5) + 
  geom_label(aes(label = round(hr, 2), color = signif)) + 
  xlim(0, 15) +
  scale_color_grey(start = 0.6, end = 0) + 
  guides(color = "none") + 
  labs(y = "", x = "Hazard ratio (no woman HoG & no crisis as baseline)") + 
  theme_minimal() + 
  geom_segment(x = 6, xend = 6, y = 1, yend = 2) +
  geom_segment(x = 5.5, xend = 6, y = 1, yend = 1) +
  geom_segment(x = 5.5, xend = 6, y = 2, yend = 2) +
  geom_text(x = 5,  y = 1.5, label = "p = 0.06", angle = 0) + 
  geom_segment(x = 14, xend = 14, y = 1, yend = 3) +
  geom_segment(x = 13.5, xend = 14, y = 1, yend = 1) +
  geom_segment(x = 13.5, xend = 14, y = 3, yend = 3) +
  geom_text(x = 12.8,  y = 2, label = "p = 0.006", angle = 0) + 
  theme(axis.text=element_text(size=10))
ggsave("Figures/FigureB2_WHoGhistorical.pdf", width = 8, height = 5)
```

## Current Woman HoG 

We control for whether a country currently has a woman head of government. 

```{r}
yd_whog <- yd_whog |> 
  mutate(crisis_whogc = if_else(leadgender == 0 & crisis_mon == 0, "whog=0, cr=0", "NA"),
         crisis_whogc = if_else(leadgender == 0 & crisis_mon == 1, "whog=0, cr=1", crisis_whogc),
         crisis_whogc = if_else(leadgender == 1 & crisis_mon == 0, "whog=1, cr=0", crisis_whogc),
         crisis_whogc = if_else(leadgender == 1 & crisis_mon == 1, "whog=1, cr=1", crisis_whogc),
         crisis_whogcAlt = if_else(crisis_whogc == "whog=1, cr=1", "0whog=1, cr=1", crisis_whogc))
# with(yd_whog, table(crisis_whogc, useNA = "ifany"))
# with(yd_whog, table(crisis_whogc, crisis_mon, useNA = "ifany"))
# with(yd_whog, table(crisis_whogc, leadgender, useNA = "ifany"))
# with(yd_whog, table(crisis_mon, leadgender, useNA = "ifany"))

rhs_cov <- formula(Surv(fwfm72_t0, fwfm72_t, fwfm72_d) ~  womenrep_lag + 
                     presidential + xm_qudsest + unified_aut + 
                     growth_lag + inflate_lag + loggdppc_lag + 
                     tradepergdp_i_lag + kaopen_i_lag)

## Interact with binary woman HoG
wia_m17 <- update(wia_m1, update(rhs_cov, . ~ crisis_mon * leadgender + .), data = yd_whog)
wia_m17a <- update(wia_m1, update(rhs_cov, . ~ crisis_whogc + .), data = yd_whog)
wia_m17b <- update(wia_m1, update(rhs_cov, . ~ crisis_whogcAlt + .), data = yd_whog)

model_list_tblB6c <- list(wia_m5, wia_m17, wia_m17a, wia_m17b)
## Add AIC
model_list_tblB6c <- lapply(model_list_tblB6c, add_AIC)
```

Check results

```{r, warning = FALSE, echo = FALSE, eval = FALSE}
stargazer(model_list_tblB6c,
          se = lapply(model_list_tblB6c, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Interact Crisis x Woman HoG ever",
          type = "text")
```


### Table B6

```{r, warning = FALSE, results = 'asis'}
model_list_tblB6 <- list(wia_m5, wia_m17a)
## Add AIC
model_list_tblB6 <- lapply(model_list_tblB6, add_AIC)

stargazer(model_list_tblB6,
          se = lapply(model_list_tblB6, get_clse_hr),
          covariate.labels = c("Banking Crisis", 
                               "Man HoG, Crisis = 1", 
                               "Woman HoG, Crisis = 0",
                               "Woman HoG, Crisis = 1",
                               "Women in Parliament",
                               "Woman Head of Government", 
                               "Presidential", 
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita", 
                               "Trade Openness", "Capital Openness"),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table B6: Conditional Effect of Banking Crisis: Woman HoG (Current), 1972-2017",
          type = "html", 
          out = "Tables/tableB6.html")
```

Obtain p-values

```{r, warning = FALSE, echo = FALSE, eval = FALSE}
model_list_17p <- list(wia_m17a, wia_m17b)
stargazer(model_list_17p,
          se = lapply(model_list_17p, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          keep = c("crisis_whogc*"),
          report = ("vc*p"),
          type = "text")
```

### Figure B3

```{r}
df_hr <- summary(wia_m17a) $ conf.int[c("crisis_whogcwhog=0, cr=1", 
                                       "crisis_whogcwhog=1, cr=0", 
                                       "crisis_whogcwhog=1, cr=1"),] |> 
  as.data.frame() |> 
  select(hr = "exp(coef)", low = "lower .95", up = "upper .95") |> 
  mutate(womanHoG = c("No", "Yes", "Yes"),
         crisis = c("Crisis = 1", "Crisis = 0", "Crisis = 1"), 
         gc = c("Man HoG\nCrisis", 
                "Woman HoG\nNo Crisis", "Woman HoG\nCrisis"),
         signif = if_else((low > 1 & up > 1) | (low < 1 & up < 1), "Yes", "No"),
         mnum = c(3,2,1))
df_hr
```

```{r}
ggplot(data = df_hr, 
       mapping = aes(x = hr, y = reorder(gc, mnum))) + 
  geom_vline(xintercept = 1, linetype = 2, alpha = 0.4) + 
  geom_segment(aes(x = low, xend = up, yend = reorder(gc, mnum),
                   color = signif), size = 1.5) + 
  geom_label(aes(label = round(hr, 2), color = signif)) + 
  xlim(0, 12.5) +
  scale_color_grey(start = 0.6, end = 0) + 
  guides(color = "none") + 
  labs(y = "", x = "Hazard ratio (Man HoG & no crisis as baseline)") + 
  theme_minimal() + 
  geom_segment(x = 10, xend = 10, y = 1, yend = 2) +
  geom_segment(x = 9.5, xend = 10, y = 1, yend = 1) +
  geom_segment(x = 9.5, xend = 10, y = 2, yend = 2) +
  geom_text(x = 9,  y = 1.5, label = "p = 0.21", angle = 0) + 
  geom_segment(x = 12, xend = 12, y = 1, yend = 3) +
  geom_segment(x = 11.5, xend = 12, y = 1, yend = 1) +
  geom_segment(x = 11.5, xend = 12, y = 3, yend = 3) +
  geom_text(x = 11,  y = 2.5, label = "p = 0.09", angle = 0) + 
  theme(axis.text=element_text(size=10))
ggsave("Figures/FigureB3_WHoGcurrent.pdf", width = 8, height = 5)
```




# 3. Finance Minister Tenure


## Main Analysis

### Table 2

Estimate six models

```{r}
## Base model formula
fml_tenure <- formula(Surv(tenure_t0, tenure_t, tenure_d) ~ . )
fml_tenure_cov <- formula(Surv(tenure_t0, tenure_t, tenure_d) ~ womenrep_lag + leadgender + 
                                  presidential + xm_qudsest + unified_aut + 
                                  growth_lag + inflate_lag + loggdppc_lag + 
                                  tradepergdp_i_lag + kaopen_i_lag)

df_m1 <- df |> filter(tenure_st == 1 & !is.na(crisis_mon))

## Estimate six models
fmt_m1 <- coxph(update(fml_tenure, . ~ crisis_mon), 
                data = df, cluster = spellid, ties = c("efron"))
fmt_m2 <- update(fmt_m1, update(fml_tenure, . ~ fm_gender))
fmt_m3 <- update(fmt_m1, update(fml_tenure, . ~ crisis_mon + fm_gender))
fmt_m4 <- update(fmt_m1, update(fml_tenure, . ~ crisis_mon + fm_gender + gender_crisis))
fmt_m5 <- update(fmt_m1, update(fml_tenure_cov, . ~ crisis_mon + fm_gender + .))
fmt_m6 <- update(fmt_m5, update(fml_tenure_cov, . ~ crisis_mon + fm_gender + gender_crisis + .))

## Put them together
model_list_tbl2 <- list(fmt_m1, fmt_m2, fmt_m3, fmt_m4, fmt_m5, fmt_m6)
## Add AIC
model_list_tbl2 <- lapply(model_list_tbl2, add_AIC)
```


Show hazard rate coefficients.

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_tbl2,
          se = lapply(model_list_tbl2, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", 
                               "Woman Minister",
                               "Crisis x Woman",
                               "Women in Parliament",
                               "Woman Head of Government", "Presidential",
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita",
                               "Trade Openness", "Capital Openness"
                               ),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Risk of a Finance Minister Leaving Office, 1972-2017",
          type = "text")
```

```{r, warning = FALSE, results = 'asis'}
stargazer(model_list_tbl2,
          se = lapply(model_list_tbl2, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", 
                               "Woman Minister",
                               "Crisis x Woman",
                               "Women in Parliament",
                               "Woman Head of Government", "Presidential",
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita",
                               "Trade Openness", "Capital Openness"
                               ),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table 2: Risk of a Finance Minister Leaving Office, 1972-2017",
          type = "html",
          out = "Tables/table2.html")
```


Calculate Ns

Models 1-4

```{r}
calc_n_1 <- df |> 
  filter(tenure_st == 1) |> 
  group_by(spellid) |> 
  summarise(fail = max(tenure_d),
            right_cens_max = max(right_cens),
            fm_gender = median(fm_gender)
            )
with(calc_n_1, table(fail, right_cens_max))
```

```{r}
with(calc_n_1, table(fm_gender, useNA = "ifany"))
```





Models 5-6

```{r}
calc_n_2 <- df |> filter(tenure_st == 1 & !is.na(womenrep_lag) & 
                           !is.na(presidential) & !is.na(xm_qudsest) & 
                           !is.na(unified_aut) & !is.na(inflate_lag) & 
                           !is.na(tradepergdp_i_lag) & !is.na(kaopen_i_lag)) |> 
  group_by(spellid) |> 
  summarise(fail = max(tenure_d),
            right_cens_max = max(right_cens),
            fm_gender = median(fm_gender)
            )

table(calc_n_2 $ fail)
```

```{r}
with(calc_n_2, table(fm_gender, useNA = "ifany"))
```

### Figure 3

Plot for Models 1 and 5

```{r}
model_list_15 <- list(fmt_m1, fmt_m5)
df_hr <- data.frame(pe = NA, low = NA, up = NA)

for (i in 1:length(model_list_15)){
  df_hr[i, ] <- extract_hr(model_list_15[[i]])
}

df_hr <- df_hr |> 
  mutate(model = c("Model 1: Bivariate\n3,257 minister-\ntenures", 
                   "Model 5: Full\n2,287 minister-\ntenures"), 
         mnum = seq(2, 1))
```


```{r}
ggplot(data = df_hr, 
       mapping = aes(x = pe, y = reorder(model, mnum))) + 
  geom_vline(xintercept = 1, linetype = 2, alpha = 0.4) + 
  geom_segment(aes(x = low, xend = up, yend = reorder(model, mnum))) + 
  geom_label(aes(label = round(pe, 2))) + 
  scale_x_continuous(breaks = c(1,1.5, 2)) + 
  labs(x = "Hazard ratio for banking crisis", y = NULL,
       title = "") + 
  theme_minimal() + 
  theme(axis.text=element_text(size=12))
ggsave("Figures/Figure3_Tenure.pdf", width = 8, height = 4)
```




## Testing non-proportionality

### Table B12

We follow Park and Hendry's (2015) approach to test the non-proportionality using different transformations of time.

Model 1

```{r}
zphTable(fmt_m1, digits = 3)
```

Model 2

```{r}
zphTable(fmt_m2, digits = 2)
```

Model 3

```{r}
zphTable(fmt_m3, digits = 3)
```

Model 4

```{r}
zphTable(fmt_m4, digits = 3)
```


Model 5

```{r}
zphTable(fmt_m5, digits = 3)
```

Model 6

```{r}
zphTable(fmt_m6, digits = 3)
```


Summary of Results

```{r}
zph_m1 <- zphTable(fmt_m1, digits = 3)[, c(3, 6)]
zph_m2 <- zphTable(fmt_m2, digits = 2)[, c(3, 6)]
zph_m3 <- zphTable(fmt_m3, digits = 3)[, c(3, 6)]
zph_m4 <- zphTable(fmt_m4, digits = 3)[, c(3, 6)]
zph_m5 <- zphTable(fmt_m5, digits = 3)[, c(3, 6)]
zph_m6 <- zphTable(fmt_m6, digits = 3)[, c(3, 6)]

zphdf_m1 <- data.frame(x = rownames(zph_m1), km.p1 = zph_m1[,1], rank.p1 = zph_m1[,2])
zphdf_m2 <- data.frame(x = rownames(zph_m2), km.p2 = zph_m2[,1], rank.p2 = zph_m2[,2])
zphdf_m3 <- data.frame(x = rownames(zph_m3), km.p3 = zph_m3[,1], rank.p3 = zph_m3[,2])
zphdf_m4 <- data.frame(x = rownames(zph_m4), km.p4 = zph_m4[,1], rank.p4 = zph_m4[,2])
zphdf_m5 <- data.frame(x = rownames(zph_m5), km.p5 = zph_m5[,1], rank.p5 = zph_m5[,2])
zphdf_m6 <- data.frame(x = rownames(zph_m6), km.p6 = zph_m6[,1], rank.p6 = zph_m6[,2])

zphdf_sum <- full_join(zphdf_m1, zphdf_m2)
zphdf_sum <- full_join(zphdf_sum, zphdf_m3)
zphdf_sum <- full_join(zphdf_sum, zphdf_m4)
zphdf_sum <- full_join(zphdf_sum, zphdf_m5)
zphdf_sum <- full_join(zphdf_sum, zphdf_m6)
xid <- which(zphdf_sum $ x == "GLOBAL")
zphdf_sum <- rbind(zphdf_sum[-xid, ], zphdf_sum[xid, ])
zphdf_sum $ x <- c("Banking Crisis", 
                   "Woman Minister",
                   "Crisis x Woman",
                   "Women in Parliament",
                   "Woman HoG", "Presidential", 
                   "Democracy Score", "Unified Government",
                   "Economic Growth", "Inflation", "GDP per Capita", 
                   "Trade Openness", "Capital Openness", "Global")
```

Summary table

```{r}
kable(zphdf_sum, row.names = FALSE) |> 
  kable_classic(full_width = FALSE) |> 
  add_header_above(c(" " = 1, "Model 1" = 2, "Model 2" = 2, "Model 3" = 2,
                     "Model 4" = 2, "Model 5" = 2, "Model 6" = 2))

kable(zphdf_sum, row.names = FALSE) |> 
  kable_classic(full_width = FALSE) |> 
  add_header_above(c(" " = 1, "Model 1" = 2, "Model 2" = 2, "Model 3" = 2,
                     "Model 4" = 2, "Model 5" = 2, "Model 6" = 2)) |> 
  save_kable(file = "Tables/tableB12.html", self_contained = TRUE)
```

Global test suggest that Models 1, 3, 4, 5, and 6 exhibits potential violations of the PH assumption. We thus estimate a series of models that relax the PH assumption. 


Model 1

```{r}
zphTable(fmt_m1, digits = 3)
```

Estimate models that include an interaction term between a function of time, $f(t)$, and the offending variable. We use three functional forms for $f(t)$ (linear time, log time, and square root of time), and choose the functional form that produces the best model fit. 

```{r}
## t
fmt_m1nph1 <- coxph(update(fml_tenure, . ~ crisis_mon + tt(crisis_mon)), 
                data = df, cluster = spellid, ties = c("efron"), 
                tt = function(x, t, ...) x * t)
## log(t)
fmt_m1nph2 <- coxph(update(fml_tenure, . ~ crisis_mon + tt(crisis_mon)), 
                data = df, cluster = spellid, ties = c("efron"), 
                tt = function(x, t, ...) x * log(t))
## sqrt(t)
fmt_m1nph3 <- coxph(update(fml_tenure, . ~ crisis_mon + tt(crisis_mon)), 
                data = df, cluster = spellid, ties = c("efron"), 
                tt = function(x, t, ...) x * sqrt(t))

model_list_m1nph <- list(fmt_m1, fmt_m1nph1, fmt_m1nph2, fmt_m1nph3)
model_list_m1nph <- lapply(model_list_m1nph, add_AIC)
```

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_m1nph,
          p.auto = FALSE,   # necessary to stop reporting misleading p values
          se = lapply(model_list_m1nph, get_clse_hr),
          keep.stat = c("aic", "ll", "n"),
          dep.var.caption = c("Hazard Ratio (> 1 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table 2 Risk of a Finance Minister Leaving Office, 1972-2017",
          type = "text")
```

Comparing AIC, we conclude that square root of time produces the best model fit. 





Model 2

```{r}
zphTable(fmt_m2, digits = 2)
```


Model 3

```{r}
zphTable(fmt_m3, digits = 2)
```


```{r}
## t
fmt_m3nph1 <- update(fmt_m1, 
                    update(fml_tenure, . ~ crisis_mon + tt(crisis_mon) + fm_gender),
                    tt = function(x, t, ...) x * t)
## log(t)
fmt_m3nph2 <- update(fmt_m1, 
                    update(fml_tenure, . ~ crisis_mon + tt(crisis_mon) + fm_gender),
                    tt = function(x, t, ...) x * log(t))
## sqrt(t)
fmt_m3nph3 <- update(fmt_m1, 
                    update(fml_tenure, . ~ crisis_mon + tt(crisis_mon) + fm_gender),
                    tt = function(x, t, ...) x * sqrt(t))

model_list_m3nph <- list(fmt_m3, fmt_m3nph1, fmt_m3nph2, fmt_m3nph3)
model_list_m3nph <- lapply(model_list_m3nph, add_AIC)
```

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_m3nph,
          se = lapply(model_list_m3nph, get_clse_or),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Ratio (> 1 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table 2 Risk of a Finance Minister Leaving Office, 1972-2017",
          type = "text")
```


Comparing AIC, we conclude that square root of time produces the best model fit. 





Model 4

```{r}
zphTable(fmt_m4, digits = 2)
```


```{r}
fmt_m4nph1 <- update(fmt_m1, update(fml_tenure, . ~ crisis_mon + fm_gender + gender_crisis + 
                                     tt(crisis_mon)),
                    tt = function(x, t, ...) x * t)
fmt_m4nph2 <- update(fmt_m1, update(fml_tenure, . ~ crisis_mon + fm_gender + gender_crisis + 
                                     tt(crisis_mon)),
                    tt = function(x, t, ...) x * log(t))
fmt_m4nph3 <- update(fmt_m1, update(fml_tenure, . ~ crisis_mon + fm_gender + gender_crisis + 
                                     tt(crisis_mon)),
                    tt = function(x, t, ...) x * sqrt(t))

model_list_m4nph <- list(fmt_m4, fmt_m4nph1, fmt_m4nph2, fmt_m4nph3)
model_list_m4nph <- lapply(model_list_m4nph, add_AIC)

```

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_m4nph,
          se = lapply(model_list_m4nph, get_clse_or),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Ratio (> 1 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table 2 Risk of a Finance Minister Leaving Office, 1972-2017",
          type = "text")
```

Comparing AIC, we conclude that square root of time produces the best model fit. 


Model 5

```{r}
zphTable(fmt_m5, digits = 2)
```


```{r}
fmt_m5nph1 <- update(fmt_m1, update(fml_tenure_cov, . ~ crisis_mon + fm_gender + . + 
                                     tt(womenrep_lag) + tt(xm_qudsest) + 
                                     tt(tradepergdp_i_lag)),
                    tt = function(x, t, ...) x * t)
fmt_m5nph2 <- update(fmt_m1, update(fml_tenure_cov, . ~ crisis_mon + fm_gender + . + 
                                     tt(womenrep_lag) + tt(xm_qudsest) + 
                                     tt(tradepergdp_i_lag)),
                    tt = function(x, t, ...) x * log(t))
fmt_m5nph3 <- update(fmt_m1, update(fml_tenure_cov, . ~ crisis_mon + fm_gender + . + 
                                     tt(womenrep_lag) + tt(xm_qudsest) + 
                                     tt(tradepergdp_i_lag)),
                    tt = function(x, t, ...) x * sqrt(t))
model_list_m5nph <- list(fmt_m5, fmt_m5nph1, fmt_m5nph2, fmt_m5nph3)
model_list_m5nph <- lapply(model_list_m5nph, add_AIC)
```

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_m5nph,
          se = lapply(model_list_m5nph, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table 2 Risk of a Finance Minister Leaving Office, 1972-2017",
          type = "text")
```


Comparing AIC, we conclude that linear time produces the best model fit. 


Model 6

```{r}
zphTable(fmt_m6, digits = 2)
```


```{r}
fmt_m6nph1 <- update(fmt_m1, update(fml_tenure_cov, . ~ crisis_mon + fm_gender + gender_crisis + . + 
                                     tt(womenrep_lag) + tt(xm_qudsest) + 
                                     tt(tradepergdp_i_lag)),
                    tt = function(x, t, ...) x * t)
fmt_m6nph2 <- update(fmt_m1, update(fml_tenure_cov, . ~ crisis_mon + fm_gender + gender_crisis + . + 
                                     tt(womenrep_lag) + tt(xm_qudsest) + 
                                     tt(tradepergdp_i_lag)),
                    tt = function(x, t, ...) x * log(t))
fmt_m6nph3 <- update(fmt_m1, update(fml_tenure_cov, . ~ crisis_mon + fm_gender + gender_crisis + . + 
                                     tt(womenrep_lag) + tt(xm_qudsest) + 
                                     tt(tradepergdp_i_lag)),
                    tt = function(x, t, ...) x * sqrt(t))

model_list_m6nph <- list(fmt_m6, fmt_m6nph1, fmt_m6nph2, fmt_m6nph3)
model_list_m6nph <- lapply(model_list_m6nph, add_AIC)
```

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_m6nph,
          p.auto = FALSE,   # necessary to stop reporting misleading p values
          se = lapply(model_list_m6nph, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table 2 Risk of a Finance Minister Leaving Office, 1972-2017",
          type = "text")
```

Comparing AIC, we conclude that linear time produces the best model fit. 



Summarize the results

```{r}
model_list_nph <- list(fmt_m1nph3, fmt_m3nph3, fmt_m4nph3, fmt_m5nph1, fmt_m6nph1)
model_list_nph <- lapply(model_list_nph, add_AIC)
```


```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_nph,
          se = lapply(model_list_nph, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table B13 Tenure Models: Relaxing PH Assumption",
          order = c(1:5, 15, 6:8, 16, 9:13, 17, 14),
          covariate.labels = c("Banking Crisis", "Banking Crisis x f(time)",
                               "Woman Minister", "Crisis x Woman",
                               "Women in Parliament","Women in Parliament x f(time)",
                               "Woman Head of Government", "Presidential",
                               "Democracy Score", "Democracy Score x f(time)",
                               "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita",
                               "Trade Openness", "Trade Openness x f(time)", 
                               "Capital Openness"
                               ),
          type = "text")
```

```{r, warning = FALSE, results = 'asis'}
stargazer(model_list_nph,
          se = lapply(model_list_nph, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table B13: Tenure Models: Relaxing PH Assumption ",
          order = c(1:5, 15, 6:8, 16, 9:13, 17, 14),
          covariate.labels = c("Banking Crisis", "Banking Crisis x f(time)",
                               "Woman Minister", "Crisis x Woman",
                               "Women in Parliament","Women in Parliament x f(time)",
                               "Woman Head of Government", "Presidential",
                               "Democracy Score", "Democracy Score x f(time)",
                               "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita",
                               "Trade Openness", "Trade Openness x f(time)", 
                               "Capital Openness"
                               ),
          type = "html", 
          out = "Tables/tableB13.html")
```


### Figure B4

We visualize the effects of crisis on tenure over time for Models 1 and 4.


Model 1

```{r}
est_m1_3 <- calcNPH(fit = fmt_m1nph3, tfunc = "sqrt")

gm1 <- ggplot(data = est_m1_3, mapping = aes(x = time)) + 
  geom_hline(yintercept = 1, linetype = "dashed") + 
  geom_ribbon(aes(ymin = low95, ymax = upp95), alpha = 0.5) + 
  geom_line(aes(y = median)) + 
  scale_x_log10(breaks = c(1, 2, 4, 8, 16, 32, 64, 128)) + 
  labs(x = "Time since appointment (months)", 
       y = "Hazard ratio for banking crisis") + 
  theme_minimal() + 
  theme(axis.title = element_text(size = 14), 
        axis.text = element_text(size = 12)
        ) 

gm1_hist <- ggplot(df |> filter(tenure_d == 1)) + 
  geom_histogram(aes(x = tenure_t), color = "black", fill = "white") + 
  scale_x_log10() + 
  theme_void()

gm1_hist + 
  labs(title = "Model 1") + 
  theme(title = element_text(size = 18)) + 
  gm1 + 
  plot_layout(
    ncol = 1, 
    nrow = 2, 
    widths = c(4, 1),
    heights = c(1, 4)
  )
ggsave("Figures/FigureB4a_Model1.pdf", width = 5, height = 4)

```


Model 4


```{r}
calcNPHint <- function(fit, tfunc = "id", seed = 12345, nsims = 1000){
  if (tfunc == "id"){ft <- function(t) t}
  if (tfunc == "log"){ft <- function(t) log(t)}
  if (tfunc == "sqrt"){ft <- function(t) sqrt(t)}
  set.seed(seed)
  simb <- MASS::mvrnorm(n = nsims, mu = coef(fit), Sigma = vcov(fit))
  col_need <- which(colnames(simb) %in% c("crisis_mon", "tt(crisis_mon)", 
                                          "fm_gender", "gender_crisis", 
                                          "tt(gender_crisis)"))
  simb <- simb[, col_need]
  time <- seq(from = 1, to = 120, by = 3)
  xmat_men <- cbind(1, 0, 0, ft(time), 0)
  xmat_women <- cbind(1, 1, 1, ft(time), ft(time))
  xb_men <- exp(xmat_men %*% t(simb))
  xb_women <- exp(xmat_women %*% t(simb))
  cut_points <- c(0.025, 0.05, 0.5, 0.95, 0.975)
  est_men <- apply(xb_men, 1, quantile, cut_points) |> t()
  est_men <- cbind(time = time, est_men) |> as.data.frame()
  est_women <- apply(xb_women, 1, quantile, cut_points) |> t()
  est_women <- cbind(time = time, est_women) |> as.data.frame()
  colnames(est_men) <- colnames(est_women) <- c("time", "low95", "low90", "median", "upp90", "upp95")
  est_men <- est_men |> mutate(gender = "men")
  est_women <- est_women |> mutate(gender = "women")
  est <- rbind(est_men, est_women)
  return(est)
}

calcNPHint2 <- function(fit, tfunc = "id", seed = 12345, nsims = 1000){
  if (tfunc == "id"){ft <- function(t) t}
  if (tfunc == "log"){ft <- function(t) log(t)}
  if (tfunc == "sqrt"){ft <- function(t) sqrt(t)}
  set.seed(seed)
  simb <- MASS::mvrnorm(n = nsims, mu = coef(fit), Sigma = vcov(fit))
  col_need <- which(colnames(simb) %in% c("crisis_mon", "tt(crisis_mon)", 
                                          "fm_gender", "gender_crisis"))
  simb <- simb[, col_need]
  time <- seq(from = 1, to = 120, by = 3)
  xmat_men <- cbind(1, 0, 0, ft(time))
  xmat_women <- cbind(1, 1, 1, ft(time))
  xb_men <- exp(xmat_men %*% t(simb))
  xb_women <- exp(xmat_women %*% t(simb))
  cut_points <- c(0.025, 0.05, 0.5, 0.95, 0.975)
  est_men <- apply(xb_men, 1, quantile, cut_points) |> t()
  est_men <- cbind(time = time, est_men) |> as.data.frame()
  est_women <- apply(xb_women, 1, quantile, cut_points) |> t()
  est_women <- cbind(time = time, est_women) |> as.data.frame()
  colnames(est_men) <- colnames(est_women) <- c("time", "low95", "low90", "median", "upp90", "upp95")
  est_men <- est_men |> mutate(gender = "men")
  est_women <- est_women |> mutate(gender = "women")
  est <- rbind(est_men, est_women)
  return(est)
}

est_m4_3 <- calcNPHint2(fit = fmt_m4nph3, tfunc = "sqrt")

ggplot(data = est_m4_3, mapping = aes(x = time)) + 
  geom_hline(yintercept = 1, linetype = "dashed") + 
  geom_ribbon(aes(ymin = low95, ymax = upp95), alpha = 0.5) + 
  geom_line(aes(y = median)) + 
  scale_x_log10(breaks = c(1, 2, 4, 8, 16, 32, 64, 128)) + 
  labs(x = "Time since appointment (months)", 
       y = "Hazard ratio for banking crisis",
       title = "Model 4") + 
  theme_minimal() + 
  facet_wrap(gender ~ ., ncol = 2) + 
  theme(title = element_text(size = 18),
        axis.title = element_text(size = 14), 
        axis.text = element_text(size = 12),
        strip.text = element_text(size = 12))
ggsave("Figures/FigureB4b_Model4.pdf", width = 5, height = 4)
```


When does the effect for women become greater than 1 at 95% level? 

```{r}
est_m4_3 |> filter(gender == "women" & low95 > 1)
```




## Plot Hazard Ratio

Let's re-parameterize the interactive models to obtain coefficients for each combination of values.

```{r}
df <- df |> mutate(
  gender_cr = if_else(crisis_mon == 0 & fm_gender == 0, "Men, Crisis=0", "NA"),
  gender_cr = if_else(crisis_mon == 1 & fm_gender == 0, "Men, Crisis=1", gender_cr),
  gender_cr = if_else(crisis_mon == 0 & fm_gender == 1, "Women, Crisis=0", gender_cr),
  gender_cr = if_else(crisis_mon == 1 & fm_gender == 1, "Women, Crisis=1", gender_cr),
  gender_ap = if_else(crisis_appoint == 0 & fm_gender == 0, "Men, CrisisApp=0", "NA"),
  gender_ap = if_else(crisis_appoint == 1 & fm_gender == 0, "Men, CrisisApp=1", gender_ap),
  gender_ap = if_else(crisis_appoint == 0 & fm_gender == 1, "Women, CrisisApp=0", gender_ap),
  gender_ap = if_else(crisis_appoint == 1 & fm_gender == 1, "Women, CrisisApp=1", gender_ap),
  gen_cr_ap = if_else(crisis_appoint == 0 & crisis_mon == 0 & fm_gender == 0, "Men, A=0, C=0", "NA"),
  gen_cr_ap = if_else(crisis_appoint == 0 & crisis_mon == 0 & fm_gender == 1, 
                      "Women, A=0, C=0", gen_cr_ap),
  gen_cr_ap = if_else(crisis_appoint == 0 & crisis_mon == 1 & fm_gender == 0, 
                      "Men, A=0, C=1", gen_cr_ap),
  gen_cr_ap = if_else(crisis_appoint == 0 & crisis_mon == 1 & fm_gender == 1, 
                      "Women, A=0, C=1", gen_cr_ap),
  gen_cr_ap = if_else(crisis_appoint == 1 & crisis_mon == 0 & fm_gender == 0, 
                      "Men, A=1, C=0", gen_cr_ap),
  gen_cr_ap = if_else(crisis_appoint == 1 & crisis_mon == 0 & fm_gender == 1, 
                      "Women, A=1, C=0", gen_cr_ap),
  gen_cr_ap = if_else(crisis_appoint == 1 & crisis_mon == 1 & fm_gender == 0, 
                      "Men, A=1, C=1", gen_cr_ap),
  gen_cr_ap = if_else(crisis_appoint == 1 & crisis_mon == 1 & fm_gender == 1, 
                      "Women, A=1, C=1", gen_cr_ap),
 gender_cr_11 = if_else(gender_cr == "Women, Crisis=1", "1 Women, Crisis=1", gender_cr),
 gender_ap_11 = if_else(gender_ap == "Women, CrisisApp=1", "1 Women, CrisisApp=1", gender_ap)
)
```

Estimate models that are equivalent but alternatively parameterized.

```{r}
fmt_m4a <- update(fmt_m1, update(fml_tenure, . ~ gender_cr))
fmt_m4b <- update(fmt_m1, update(fml_tenure, . ~ gender_cr_11))
fmt_m6a <- update(fmt_m5, update(fml_tenure_cov, . ~ gender_cr + .))
fmt_m6b <- update(fmt_m5, update(fml_tenure_cov, . ~ gender_cr_11 + .))
model_list_46 <- list(fmt_m4, fmt_m4a, fmt_m4b, fmt_m6, fmt_m6a, fmt_m6b)
```

Check the equivalence

```{r, warning = FALSE, echo = FALSE, eval = FALSE}
stargazer(model_list_46,
          se = lapply(model_list_46, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Risk of a Finance Minister Leaving Office, 1972-2017",
          type = "text")

```


```{r, warning = FALSE, echo = FALSE, eval = FALSE}
model_list_46 <- list(fmt_m4a, fmt_m4b, fmt_m6a, fmt_m6b)
stargazer(model_list_46,
          se = lapply(model_list_46, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          keep = c("gender_cr*"),
          report = ("vc*p"),
          column.labels = c("Men/noC", "Women/C", "Men/noC", "Women/C"),
          type = "text")
```



```{r, warning = FALSE, results = 'asis'}
model_list_46 <- list(fmt_m4a, fmt_m4b, fmt_m6a, fmt_m6b)
stargazer(model_list_46,
          se = lapply(model_list_46, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          keep = c("gender_cr*"),
          report = ("vc*p"),
          column.labels = c("Men/noC", "Women/C", "Men/noC", "Women/C"),
          type = "html")
```

Show exponentiated coefficients (Hazard ratio).

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
model.list <- list(fmt_m6a, fmt_m6b)
stargazer(model.list,
          apply.coef = exp, # Report exponentiated coef = hazard ratio
          p.auto = FALSE,   # necessary to stop reporting misleading p values
          se = lapply(model.list, get_clse_or),
          report = ("vc*p"),
          keep = c("gender_cr*"),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          type = "text")
```

### Figure 4


```{r}
df_hr <- summary(fmt_m6a) $ conf.int[c("gender_crMen, Crisis=1", 
                                       "gender_crWomen, Crisis=0", 
                                       "gender_crWomen, Crisis=1"),] |> 
  as.data.frame() |> 
  select(hr = "exp(coef)", low = "lower .95", up = "upper .95") |> 
  mutate(gender = c("Men", "Women", "Women"),
         crisis = c("Crisis = 1", "Crisis = 0", "Crisis = 1"), 
         gc = c("Men\nCrisis", "Women\nNo Crisis", "Women\nCrisis"),
         signif = if_else((low > 1 & up > 1) | (low < 1 & up < 1), "Yes", "No"),
         mnum = c(3,2,1))
df_hr
```

```{r}
ggplot(data = df_hr, 
       mapping = aes(x = hr, y = reorder(gc, mnum))) + 
  geom_vline(xintercept = 1, linetype = 2, alpha = 0.4) + 
  geom_segment(aes(x = low, xend = up, yend = reorder(gc, mnum),
                   color = signif), size = 1.5) + 
  geom_label(aes(label = round(hr, 2), color = signif)) + 
  xlim(0.5, 2) + 
  scale_color_grey(start = 0.6, end = 0) + 
  guides(color = "none") + 
  labs(y = "", x = "Hazard ratio (men under no crisis as baseline)") + 
  theme_minimal() + 
  geom_segment(x = 1.625, xend = 1.625, y = 1, yend = 2) +
  geom_segment(x = 1.575, xend = 1.625, y = 1, yend = 1) +
  geom_segment(x = 1.575, xend = 1.625, y = 2, yend = 2) +
  geom_text(x = 1.525,  y = 1.5, label = "p = 0.57", angle = 0) + 
  geom_segment(x = 1.875, xend = 1.875, y = 1, yend = 3) +
  geom_segment(x = 1.825, xend = 1.875, y = 1, yend = 1) +
  geom_segment(x = 1.825, xend = 1.875, y = 3, yend = 3) +
  geom_text(x = 1.775,  y = 2.5, label = "p = 0.11", angle = 0) + 
  theme(axis.title = element_text(size = 12), 
        axis.text = element_text(size = 12))
ggsave("Figures/Figure4_TenureByGender.pdf", width = 8, height = 4)
```


## Outlier Analysis


```{r}
spell_lev <- df |> 
  filter(fm_name != "Unknown") |> 
  group_by(spellid) |> 
  summarise(time = max(tenure_t), 
            fail = max(tenure_d), 
            decade = max(decade), # decade of termination
            year = max(year),     # year of termination
            decade0 = min(decade),    # decade of appointment
            year0 = min(year),    # year of appointment
            fm_gender = mean(fm_gender), 
            fm_gender_f = if_else(fm_gender == 1, "Woman", "Man"), 
            left_trunc = min(left_trunc),
            right_cens_max = max(right_cens)
            )
```


```{r}
spell_lev |> filter(time > 120 & fm_gender_f == "Woman")
```


Longest serving woman (223.3 months)

```{r}
df |> filter(spellid == 2385) |> 
  select(fm_name, fm_gender, country_name) |> unique()
```

Second longest serving woman (180.2 months)

```{r}
df |> filter(spellid == 728) |> 
  select(fm_name, fm_gender, country_name) |> unique()

```

Third longest serving woman (142.1 months)

```{r}
df |> filter(spellid == 2070) |> 
  select(fm_name, fm_gender, country_name) |> unique()
```


- 2385 (longest serving one in PRK) is not in the data for the full model  
- 728 (second longest serving one in DMA) is also not in the data for the full model  
- 2070 (third longest serving one in NAM)  

```{r}
cox_o1 <- update(fmt_m6, data = df |> filter(spellid != 2070))
cox_o2 <- update(fmt_m6, data = df |> filter(iso3c != "NAM"))

model_list_tblB7 <- list(fmt_m6, cox_o1, cox_o2)
model_list_tblB7 <- lapply(model_list_tblB7, add_AIC)
```

### Table B7

```{r, warning = FALSE, results = 'asis'}
stargazer(model_list_tblB7,
          se = lapply(model_list_tblB7, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", 
                               "Woman Minister",
                               "Crisis x Woman",
                               "Women in Parliament",
                               "Woman Head of Government", "Presidential",
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita",
                               "Trade Openness", "Capital Openness"
                               ),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table B7: Tenure Model Robustness Check – Dropping Outliers, 1972-2017",
          type = "html",
          out = "Tables/tableB7.html")
```


Calculate Ns


```{r}
calc_NAM0 <- df |> filter(tenure_st == 1 & 
                           !is.na(womenrep_lag) & 
                           !is.na(presidential) & !is.na(xm_qudsest) & 
                           !is.na(unified_aut) & !is.na(inflate_lag) & 
                           !is.na(tradepergdp_i_lag) & !is.na(kaopen_i_lag)) |> 
  group_by(spellid) |> 
  summarise(fail = max(tenure_d),
            right_cens_max = max(right_cens)
            )

table(calc_NAM0 $ fail)
```


```{r}
calc_NAM1 <- df |> filter(tenure_st == 1 & 
                           spellid != 2070 & 
                           !is.na(womenrep_lag) & 
                           !is.na(presidential) & !is.na(xm_qudsest) & 
                           !is.na(unified_aut) & !is.na(inflate_lag) & 
                           !is.na(tradepergdp_i_lag) & !is.na(kaopen_i_lag)) |> 
  group_by(spellid) |> 
  summarise(fail = max(tenure_d),
            right_cens_max = max(right_cens)
            )

table(calc_NAM1 $ fail)
```


```{r}
calc_NAM2 <- df |> filter(tenure_st == 1 & 
                           iso3c != "NAM" & 
                           !is.na(womenrep_lag) & 
                           !is.na(presidential) & !is.na(xm_qudsest) & 
                           !is.na(unified_aut) & !is.na(inflate_lag) & 
                           !is.na(tradepergdp_i_lag) & !is.na(kaopen_i_lag)) |> 
  group_by(spellid) |> 
  summarise(fail = max(tenure_d),
            right_cens_max = max(right_cens)
            )

table(calc_NAM2 $ fail)
```


Average tenure for men and women in Namibia


```{r}
spell_NAM <- df |> 
  filter(fm_name != "Unknown") |> 
  filter(iso3c == "NAM") |> 
  group_by(spellid) |> 
  summarise(time = max(tenure_t), 
            fail = max(tenure_d), 
            decade = max(decade), # decade of termination
            year = max(year),     # year of termination
            decade0 = min(decade),    # decade of appointment
            year0 = min(year),    # year of appointment
            fm_gender = mean(fm_gender), 
            fm_gender_f = if_else(fm_gender == 1, "Woman", "Man"), 
            left_trunc = min(left_trunc),
            right_cens_max = max(right_cens)
            )
```

```{r}
spell_NAM |> filter(fm_gender_f == "Woman")
```

```{r}
spell_NAM |> filter(fm_gender_f == "Man" & fail == 1) |> summarise(time = mean(time))
```


## Control for Election Months, Crisis Appointment, and Decade FE

```{r}
fmt_m7 <- update(fmt_m5, update(fml_tenure_cov, . ~ crisis_mon + fm_gender + 
                                  gender_crisis + elec_leg + elec_exe + .))
fmt_m8 <- update(fmt_m5, update(fml_tenure_cov, . ~ crisis_mon + fm_gender + 
                                  gender_crisis + crisis_appoint + .))
fmt_m6_FE <- update(fmt_m6, update(fml_tenure_cov, . ~ crisis_mon + fm_gender + 
                                     gender_crisis + . + as.factor(decade80)))

model_list_tblB8 <- list(fmt_m6, fmt_m7, fmt_m8, fmt_m6_FE)
model_list_tblB8 <- lapply(model_list_tblB8, add_AIC)
```


### Table B8

Show hazard rate coefficients.

```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_tblB8,
          se = lapply(model_list_tblB8, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", 
                               "Woman Minister",
                               "Crisis x Woman",
                               "Legislative Election", "Executive Election",
                               "Crisis Appointment",
                               "Women in Parliament",
                               "Woman Head of Government", "Presidential",
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita",
                               "Trade Openness", "Capital Openness", 
                               "1990s", "2000s", "2010s"
                               ),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Robustness Check - Additional Controls",
          type = "text")
```




```{r, warning = FALSE, results = 'asis'}
stargazer(model_list_tblB8,
          se = lapply(model_list_tblB8, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Banking Crisis", 
                               "Woman Minister",
                               "Crisis x Woman",
                               "Legislative Election", "Executive Election",
                               "Crisis Appointment",
                               "Women in Parliament",
                               "Woman Head of Government", "Presidential",
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita",
                               "Trade Openness", "Capital Openness", 
                               "1990s", "2000s", "2010s"
                               ),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table B8: Tenure Model Robustness Checks - Additional Controls, 1972-2017",
          type = "html", 
          out = "Tables/tableB8.html")
```




Calculate Ns for the crisis appoint model

```{r}
calc_n_3 <- df |> filter(tenure_st == 1 & !is.na(crisis_appoint) &
                           !is.na(womenrep_lag) & 
                           !is.na(presidential) & !is.na(xm_qudsest) & 
                           !is.na(unified_aut) & !is.na(inflate_lag) & 
                           !is.na(tradepergdp_i_lag) & !is.na(kaopen_i_lag)) |> 
  group_by(spellid) |> 
  summarise(fail = max(tenure_d),
            right_cens_max = max(right_cens)
            )

table(calc_n_3 $ fail) # 2250 - 2044
```



## Currency, Inflation, and Unemployment Crises


```{r}
fml_tenure_cov <- formula(Surv(tenure_t0, tenure_t, tenure_d) ~ 
                            womenrep_lag + leadgender + 
                            presidential + xm_qudsest + unified_aut + 
                            growth_lag + inflate_lag + loggdppc_lag + 
                            tradepergdp_i_lag + kaopen_i_lag)
fml_tenure_inf <- formula(Surv(tenure_t0, tenure_t, tenure_d) ~ 
                            womenrep_lag + leadgender + 
                            presidential + xm_qudsest + unified_aut + 
                            growth_lag + loggdppc_lag + 
                            tradepergdp_i_lag + kaopen_i_lag)

## Define interaction terms
df <- df |> mutate(
 gender_currency = currency_mon0 * fm_gender, 
 gender_crisis20 = crisis20 * fm_gender,
 gender_unemp = cri_avgsd5_roll * fm_gender
)
## Currency
cox_c0 <- update(fmt_m5, update(
  fml_tenure_cov, . ~ fm_gender + crisis_mon + gender_crisis + .))
cox_c1 <- update(fmt_m5, update(
  fml_tenure_cov, . ~ fm_gender + currency_mon0 + gender_currency + .))
cox_i0 <- update(fmt_m5, update(
  fml_tenure_inf, . ~ fm_gender + crisis_mon + gender_crisis + .))
cox_i1 <- update(fmt_m5, update(
  fml_tenure_inf, . ~ fm_gender + crisis20 + gender_crisis20 + .))
cox_u0 <- update(fmt_m5, update(
  fml_tenure_inf, . ~ fm_gender + crisis_mon + gender_crisis + .), 
  data = df |> filter(!is.na(cri_avgsd5_roll)))
cox_u1 <- update(cox_u0, update(
  fml_tenure_inf, . ~ fm_gender + cri_avgsd5_roll + gender_unemp + .
))

model_list_tblB9 <- list(cox_c0, cox_c1, cox_i0, cox_i1, cox_u0, cox_u1)
model_list_tblB9 <- lapply(model_list_tblB9, add_AIC)

```


```{r, echo = FALSE, eval = FALSE, warning = FALSE}
stargazer(model_list_tblB9,
          se = lapply(model_list_tblB9, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Woman Minister",
                               "Banking Crisis", "Crisis x Woman",
                               "Currency Crisis", "Crisis x Woman",
                               "Inflation Crisis", "Crisis x Woman",
                               "Unemployment Crisis", "Crisis x Woman",
                               "Women in Parliament",
                               "Woman Head of Government", "Presidential",
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita",
                               "Trade Openness", "Capital Openness"
                               ),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Risk of a Finance Minister Leaving Office, 1972-2017",
          type = "text")
```

```{r, warning = FALSE, results = 'asis'}
stargazer(model_list_tblB9,
          se = lapply(model_list_tblB9, get_clse_hr),
          omit.stat = c("rsq", "max.rsq", "wald", "lr", "logrank"),
          covariate.labels = c("Woman Minister",
                               "Banking Crisis", "Crisis x Woman",
                               "Currency Crisis", "Crisis x Woman",
                               "Inflation Crisis", "Crisis x Woman",
                               "Unemployment Crisis", "Crisis x Woman",
                               "Women in Parliament",
                               "Woman Head of Government", "Presidential",
                               "Democracy Score", "Unified Government",
                               "Economic Growth", "Inflation", "GDP per Capita",
                               "Trade Openness", "Capital Openness"
                               ),
          dep.var.caption = c("Hazard Rate (> 0 means shorter duration)"),
          dep.var.labels.include = FALSE,
          title = "Table B9: Tenure Model Robustness Checks, Currency, Inflation, and Unemployment Crises",
          type = "html",
          out = "Tables/tableB9.html")
```



Calculate N for column 3

```{r}
calc_c3 <- df |> filter(tenure_st == 1 & 
                           !is.na(womenrep_lag) & 
                           !is.na(presidential) & !is.na(xm_qudsest) & 
                           !is.na(unified_aut) & 
                           !is.na(tradepergdp_i_lag) & !is.na(kaopen_i_lag)) |> 
  group_by(spellid) |> 
  summarise(fail = max(tenure_d),
            right_cens_max = max(right_cens)
            )

table(calc_c3 $ fail)
```

Calculate N for column 4

```{r}
calc_c4 <- df |> filter(tenure_st == 1 & 
                           !is.na(womenrep_lag) & 
                           !is.na(presidential) & !is.na(xm_qudsest) & 
                           !is.na(unified_aut) & !is.na(crisis20) & 
                           !is.na(tradepergdp_i_lag) & !is.na(kaopen_i_lag)) |> 
  group_by(spellid) |> 
  summarise(fail = max(tenure_d),
            right_cens_max = max(right_cens)
            )

table(calc_c4 $ fail)
```


Calculate N for columns 5 and 6

```{r}
calc_c5 <- df |> filter(tenure_st == 1 & 
                           !is.na(womenrep_lag) & 
                           !is.na(presidential) & !is.na(xm_qudsest) & 
                           !is.na(unified_aut) & !is.na(cri_avgsd5_roll) & 
                           !is.na(tradepergdp_i_lag) & !is.na(kaopen_i_lag)) |> 
  group_by(spellid) |> 
  summarise(fail = max(tenure_d),
            right_cens_max = max(right_cens)
            )

table(calc_c5 $ fail)
```


# 4. Appendix C

## Table C1

Gender

```{r}
table(df_appC $ ministergender, useNA = "ifany")
```
Advanced Economics Education

```{r}
with(df_appC, table(advanced_econ_degree, ministergender, useNA = "ifany"))
```

Advanced Economics Education

```{r}
with(df_appC, table(phd_econ, ministergender, useNA = "ifany"))
```

Technocrats

```{r}
with(df_appC, table(technocrats, ministergender, useNA = "ifany"))
```



## Table C2 

Test whether a minister was a technocrat prior to serving as a finance minister

```{r}
## by gender
tech_1 <-  df_appC |> 
  filter(!is.na(technocrats)) |> 
  group_by(ministergender) |> 
  summarise(n_obs = n(),
            mean = mean(technocrats),
            se = sd(technocrats)/sqrt(n_obs)) |> 
  mutate(ministergender = if_else(ministergender == 1, "Women", "Men"))

## combined
tech_2 <- df_appC |> 
  filter(!is.na(technocrats)) |> 
  summarise(n_obs = n(), 
            mean = mean(technocrats),
            se = sd(technocrats)/sqrt(n_obs)) |> 
  mutate(ministergender = "Combined") |> 
  select(ministergender, n_obs, mean, se)

rbind(tech_1, tech_2)
```

T-test 

```{r}
t.test(technocrats ~ ministergender, var.equal = TRUE, data = df_appC)
```

We could get equivalent results using linear regression.

```{r}
lm(technocrats ~ ministergender, data = df_appC) |> summary()
```


## Table C3

Test whether a minister has an advanced degree in economics 

```{r}
## by gender
adve_1 <-  df_appC |> 
  filter(!is.na(advanced_econ_degree)) |> 
  group_by(ministergender) |> 
  summarise(n_obs = n(),
            mean = mean(advanced_econ_degree),
            se = sd(advanced_econ_degree)/sqrt(n_obs)) |> 
  mutate(ministergender = if_else(ministergender == 1, "Women", "Men"))

## combined
adve_2 <- df_appC |> 
  filter(!is.na(advanced_econ_degree)) |> 
  summarise(n_obs = n(), 
            mean = mean(advanced_econ_degree),
            se = sd(advanced_econ_degree)/sqrt(n_obs)) |> 
  mutate(ministergender = "Combined") |> 
  select(ministergender, n_obs, mean, se)

rbind(adve_1, adve_2)
```

T-test 

```{r}
t.test(advanced_econ_degree ~ ministergender, var.equal = TRUE, data = df_appC)
```

We could get equivalent results using linear regression.

```{r}
lm(advanced_econ_degree ~ ministergender, data = df_appC) |> summary()
```

## Table C4

Test whether a minister has a Ph.D. in economics 

```{r}
## by gender
phd_1 <-  df_appC |> 
  filter(!is.na(phd_econ)) |> 
  group_by(ministergender) |> 
  summarise(n_obs = n(),
            mean = mean(phd_econ),
            se = sd(phd_econ)/sqrt(n_obs)) |> 
  mutate(ministergender = if_else(ministergender == 1, "Women", "Men"))

## combined
phd_2 <- df_appC |> 
  filter(!is.na(phd_econ)) |> 
  summarise(n_obs = n(), 
            mean = mean(phd_econ),
            se = sd(phd_econ)/sqrt(n_obs)) |> 
  mutate(ministergender = "Combined") |> 
  select(ministergender, n_obs, mean, se)

rbind(phd_1, phd_2)
```

T-test 

```{r}
t.test(phd_econ ~ ministergender, var.equal = TRUE, data = df_appC)
```

We could get equivalent results using linear regression.

```{r}
lm(phd_econ ~ ministergender, data = df_appC) |> summary()
```


# Session Information

<details><summary>Session Info</summary>

```{r sessionInfo}
Sys.time()
sessionInfo()
```

</details>
